119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5308009a76SAlexey Dobriyan #include <linux/cache.h> 547c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 551977f032SJiri Slaby #include <linux/bitops.h> 5619baf839SRobert Olsson #include <linux/types.h> 5719baf839SRobert Olsson #include <linux/kernel.h> 5819baf839SRobert Olsson #include <linux/mm.h> 5919baf839SRobert Olsson #include <linux/string.h> 6019baf839SRobert Olsson #include <linux/socket.h> 6119baf839SRobert Olsson #include <linux/sockios.h> 6219baf839SRobert Olsson #include <linux/errno.h> 6319baf839SRobert Olsson #include <linux/in.h> 6419baf839SRobert Olsson #include <linux/inet.h> 65cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6619baf839SRobert Olsson #include <linux/netdevice.h> 6719baf839SRobert Olsson #include <linux/if_arp.h> 6819baf839SRobert Olsson #include <linux/proc_fs.h> 692373ce1cSRobert Olsson #include <linux/rcupdate.h> 7019baf839SRobert Olsson #include <linux/skbuff.h> 7119baf839SRobert Olsson #include <linux/netlink.h> 7219baf839SRobert Olsson #include <linux/init.h> 7319baf839SRobert Olsson #include <linux/list.h> 745a0e3ad6STejun Heo #include <linux/slab.h> 75bc3b2d7fSPaul Gortmaker #include <linux/export.h> 76ffa915d0SDavid S. Miller #include <linux/vmalloc.h> 77b90eb754SJiri Pirko #include <linux/notifier.h> 78457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7919baf839SRobert Olsson #include <net/ip.h> 8019baf839SRobert Olsson #include <net/protocol.h> 8119baf839SRobert Olsson #include <net/route.h> 8219baf839SRobert Olsson #include <net/tcp.h> 8319baf839SRobert Olsson #include <net/sock.h> 8419baf839SRobert Olsson #include <net/ip_fib.h> 8504b1d4e5SIdo Schimmel #include <net/fib_notifier.h> 86f6d3c192SDavid Ahern #include <trace/events/fib.h> 8719baf839SRobert Olsson #include "fib_lookup.h" 8819baf839SRobert Olsson 89c3852ef7SIdo Schimmel static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, 90c3852ef7SIdo Schimmel enum fib_event_type event_type, u32 dst, 916eba87c7SDavid Ahern int dst_len, struct fib_alias *fa) 92c3852ef7SIdo Schimmel { 93c3852ef7SIdo Schimmel struct fib_entry_notifier_info info = { 94c3852ef7SIdo Schimmel .dst = dst, 95c3852ef7SIdo Schimmel .dst_len = dst_len, 966eba87c7SDavid Ahern .fi = fa->fa_info, 976eba87c7SDavid Ahern .tos = fa->fa_tos, 986eba87c7SDavid Ahern .type = fa->fa_type, 996eba87c7SDavid Ahern .tb_id = fa->tb_id, 100c3852ef7SIdo Schimmel }; 10104b1d4e5SIdo Schimmel return call_fib4_notifier(nb, net, event_type, &info.info); 102c3852ef7SIdo Schimmel } 103c3852ef7SIdo Schimmel 104b90eb754SJiri Pirko static int call_fib_entry_notifiers(struct net *net, 105b90eb754SJiri Pirko enum fib_event_type event_type, u32 dst, 1066c31e5a9SDavid Ahern int dst_len, struct fib_alias *fa, 1076c31e5a9SDavid Ahern struct netlink_ext_ack *extack) 108b90eb754SJiri Pirko { 109b90eb754SJiri Pirko struct fib_entry_notifier_info info = { 1106c31e5a9SDavid Ahern .info.extack = extack, 111b90eb754SJiri Pirko .dst = dst, 112b90eb754SJiri Pirko .dst_len = dst_len, 1136eba87c7SDavid Ahern .fi = fa->fa_info, 1146eba87c7SDavid Ahern .tos = fa->fa_tos, 1156eba87c7SDavid Ahern .type = fa->fa_type, 1166eba87c7SDavid Ahern .tb_id = fa->tb_id, 117b90eb754SJiri Pirko }; 11804b1d4e5SIdo Schimmel return call_fib4_notifiers(net, event_type, &info.info); 119b90eb754SJiri Pirko } 120b90eb754SJiri Pirko 12106ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 12219baf839SRobert Olsson 12319baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 12495f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 12519baf839SRobert Olsson 12619baf839SRobert Olsson typedef unsigned int t_key; 12719baf839SRobert Olsson 12888bae714SAlexander Duyck #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) 12964c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 13064c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 1312373ce1cSRobert Olsson 13235c6edacSAlexander Duyck struct key_vector { 13341b489fdSAlexander Duyck t_key key; 13441b489fdSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 13541b489fdSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 13641b489fdSAlexander Duyck unsigned char slen; 13741b489fdSAlexander Duyck union { 13841b489fdSAlexander Duyck /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ 13979e5ad2cSAlexander Duyck struct hlist_head leaf; 14041b489fdSAlexander Duyck /* This array is valid if (pos | bits) > 0 (TNODE) */ 14135c6edacSAlexander Duyck struct key_vector __rcu *tnode[0]; 14219baf839SRobert Olsson }; 143adaf9816SAlexander Duyck }; 14419baf839SRobert Olsson 145dc35dbedSAlexander Duyck struct tnode { 14656ca2adfSAlexander Duyck struct rcu_head rcu; 1476e22d174SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 1486e22d174SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 149f23e59fbSAlexander Duyck struct key_vector __rcu *parent; 150dc35dbedSAlexander Duyck struct key_vector kv[1]; 15156ca2adfSAlexander Duyck #define tn_bits kv[0].bits 152dc35dbedSAlexander Duyck }; 153dc35dbedSAlexander Duyck 154dc35dbedSAlexander Duyck #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) 15541b489fdSAlexander Duyck #define LEAF_SIZE TNODE_SIZE(1) 15641b489fdSAlexander Duyck 15719baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 15819baf839SRobert Olsson struct trie_use_stats { 15919baf839SRobert Olsson unsigned int gets; 16019baf839SRobert Olsson unsigned int backtrack; 16119baf839SRobert Olsson unsigned int semantic_match_passed; 16219baf839SRobert Olsson unsigned int semantic_match_miss; 16319baf839SRobert Olsson unsigned int null_node_hit; 1642f36895aSRobert Olsson unsigned int resize_node_skipped; 16519baf839SRobert Olsson }; 16619baf839SRobert Olsson #endif 16719baf839SRobert Olsson 16819baf839SRobert Olsson struct trie_stat { 16919baf839SRobert Olsson unsigned int totdepth; 17019baf839SRobert Olsson unsigned int maxdepth; 17119baf839SRobert Olsson unsigned int tnodes; 17219baf839SRobert Olsson unsigned int leaves; 17319baf839SRobert Olsson unsigned int nullpointers; 17493672292SStephen Hemminger unsigned int prefixes; 17506ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 17619baf839SRobert Olsson }; 17719baf839SRobert Olsson 17819baf839SRobert Olsson struct trie { 17988bae714SAlexander Duyck struct key_vector kv[1]; 18019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1818274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 18219baf839SRobert Olsson #endif 18319baf839SRobert Olsson }; 18419baf839SRobert Olsson 18588bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn); 1869ab948a9SDavid Ahern static unsigned int tnode_free_size; 187c3059477SJarek Poplawski 188c3059477SJarek Poplawski /* 1899ab948a9SDavid Ahern * synchronize_rcu after call_rcu for outstanding dirty memory; it should be 1909ab948a9SDavid Ahern * especially useful before resizing the root node with PREEMPT_NONE configs; 1919ab948a9SDavid Ahern * the value was obtained experimentally, aiming to avoid visible slowdown. 192c3059477SJarek Poplawski */ 1939ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem = 512 * 1024; 1949ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_min = 64 * 1024; 1959ab948a9SDavid Ahern unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024; 19619baf839SRobert Olsson 19708009a76SAlexey Dobriyan static struct kmem_cache *fn_alias_kmem __ro_after_init; 19808009a76SAlexey Dobriyan static struct kmem_cache *trie_leaf_kmem __ro_after_init; 19919baf839SRobert Olsson 20056ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv) 20156ca2adfSAlexander Duyck { 20256ca2adfSAlexander Duyck return container_of(kv, struct tnode, kv[0]); 20356ca2adfSAlexander Duyck } 20456ca2adfSAlexander Duyck 20564c9b6fbSAlexander Duyck /* caller must hold RTNL */ 206f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) 207754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) 2080a5c0475SEric Dumazet 20964c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 210f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) 211754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) 2120a5c0475SEric Dumazet 21364c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 21435c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) 21506801916SStephen Hemminger { 216adaf9816SAlexander Duyck if (n) 217f23e59fbSAlexander Duyck rcu_assign_pointer(tn_info(n)->parent, tp); 21864c9b6fbSAlexander Duyck } 21964c9b6fbSAlexander Duyck 220f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) 22164c9b6fbSAlexander Duyck 22264c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 22364c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 22464c9b6fbSAlexander Duyck */ 2252e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn) 22664c9b6fbSAlexander Duyck { 22764c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 22806801916SStephen Hemminger } 2292373ce1cSRobert Olsson 23088bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) 23188bae714SAlexander Duyck 2322e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv) 2332e1ac88aSAlexander Duyck { 2342e1ac88aSAlexander Duyck unsigned long index = key ^ kv->key; 2352e1ac88aSAlexander Duyck 23688bae714SAlexander Duyck if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) 23788bae714SAlexander Duyck return 0; 23888bae714SAlexander Duyck 2392e1ac88aSAlexander Duyck return index >> kv->pos; 2402e1ac88aSAlexander Duyck } 2412e1ac88aSAlexander Duyck 242e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 243e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 244e9b44019SAlexander Duyck * all of the bits in that key are significant. 245e9b44019SAlexander Duyck * 246e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 247e9b44019SAlexander Duyck * 248e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 249e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 250e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 251e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 252e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 253e9b44019SAlexander Duyck * correct key path. 254e9b44019SAlexander Duyck * 255e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 256e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 257e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 258e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 259e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 260e9b44019SAlexander Duyck * 261e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 262e9b44019SAlexander Duyck * have many different meanings. 263e9b44019SAlexander Duyck * 264e9b44019SAlexander Duyck * Example: 265e9b44019SAlexander Duyck * _________________________________________________________________ 266e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 267e9b44019SAlexander Duyck * ----------------------------------------------------------------- 268e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 269e9b44019SAlexander Duyck * 270e9b44019SAlexander Duyck * _________________________________________________________________ 271e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 272e9b44019SAlexander Duyck * ----------------------------------------------------------------- 273e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 274e9b44019SAlexander Duyck * 275e9b44019SAlexander Duyck * tp->pos = 22 276e9b44019SAlexander Duyck * tp->bits = 3 277e9b44019SAlexander Duyck * n->pos = 13 278e9b44019SAlexander Duyck * n->bits = 4 279e9b44019SAlexander Duyck * 280e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 281e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 282e9b44019SAlexander Duyck * point we do not use them for anything. 283e9b44019SAlexander Duyck * 284e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 285e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 286e9b44019SAlexander Duyck * 'n' among tp's children. 287e9b44019SAlexander Duyck * 28898a384ecSXunlei Pang * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits 289e9b44019SAlexander Duyck * for the node n. 290e9b44019SAlexander Duyck * 291e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 292e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 293e9b44019SAlexander Duyck * 294e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 295e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 296e9b44019SAlexander Duyck * 29798a384ecSXunlei Pang * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown 298e9b44019SAlexander Duyck * at this point. 29919baf839SRobert Olsson */ 30019baf839SRobert Olsson 301f5026fabSDenis V. Lunev static const int halve_threshold = 25; 302f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 303345aa031SJarek Poplawski static const int halve_threshold_root = 15; 30480b71b80SJens Låås static const int inflate_threshold_root = 30; 3052373ce1cSRobert Olsson 3062373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3072373ce1cSRobert Olsson { 3082373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3092373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3102373ce1cSRobert Olsson } 3112373ce1cSRobert Olsson 3122373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3132373ce1cSRobert Olsson { 3142373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3152373ce1cSRobert Olsson } 3162373ce1cSRobert Olsson 31737fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 31835c6edacSAlexander Duyck ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 3191de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \ 32035c6edacSAlexander Duyck ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 32137fd30f2SAlexander Duyck 32237fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 3232373ce1cSRobert Olsson { 32456ca2adfSAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 32537fd30f2SAlexander Duyck 32656ca2adfSAlexander Duyck if (!n->tn_bits) 32737fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 32837fd30f2SAlexander Duyck else 3291d5cfdb0STetsuo Handa kvfree(n); 3302373ce1cSRobert Olsson } 3312373ce1cSRobert Olsson 33256ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 333387a5487SStephen Hemminger 334dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits) 3352373ce1cSRobert Olsson { 3361de3d87bSAlexander Duyck size_t size; 3371de3d87bSAlexander Duyck 3381de3d87bSAlexander Duyck /* verify bits is within bounds */ 3391de3d87bSAlexander Duyck if (bits > TNODE_VMALLOC_MAX) 3401de3d87bSAlexander Duyck return NULL; 3411de3d87bSAlexander Duyck 3421de3d87bSAlexander Duyck /* determine size and verify it is non-zero and didn't overflow */ 3431de3d87bSAlexander Duyck size = TNODE_SIZE(1ul << bits); 3441de3d87bSAlexander Duyck 3452373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3468d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 34715be75cdSStephen Hemminger else 3487a1c8e5aSEric Dumazet return vzalloc(size); 34915be75cdSStephen Hemminger } 3502373ce1cSRobert Olsson 35135c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n) 35295f60ea3SAlexander Duyck { 3536e22d174SAlexander Duyck ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; 35495f60ea3SAlexander Duyck } 35595f60ea3SAlexander Duyck 35635c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n) 35795f60ea3SAlexander Duyck { 3586e22d174SAlexander Duyck tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; 35995f60ea3SAlexander Duyck } 36095f60ea3SAlexander Duyck 36135c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 36219baf839SRobert Olsson { 363f38b24c9SFiro Yang struct key_vector *l; 364f38b24c9SFiro Yang struct tnode *kv; 365dc35dbedSAlexander Duyck 366f38b24c9SFiro Yang kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 367dc35dbedSAlexander Duyck if (!kv) 368dc35dbedSAlexander Duyck return NULL; 369dc35dbedSAlexander Duyck 370dc35dbedSAlexander Duyck /* initialize key vector */ 371f38b24c9SFiro Yang l = kv->kv; 37264c9b6fbSAlexander Duyck l->key = key; 373e9b44019SAlexander Duyck l->pos = 0; 37464c9b6fbSAlexander Duyck l->bits = 0; 375dc35dbedSAlexander Duyck l->slen = fa->fa_slen; 37664c9b6fbSAlexander Duyck 377d5d6487cSAlexander Duyck /* link leaf to fib alias */ 37879e5ad2cSAlexander Duyck INIT_HLIST_HEAD(&l->leaf); 379d5d6487cSAlexander Duyck hlist_add_head(&fa->fa_list, &l->leaf); 380dc35dbedSAlexander Duyck 38119baf839SRobert Olsson return l; 38219baf839SRobert Olsson } 38319baf839SRobert Olsson 38435c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits) 38519baf839SRobert Olsson { 38664c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 387f38b24c9SFiro Yang struct key_vector *tn; 388f38b24c9SFiro Yang struct tnode *tnode; 38964c9b6fbSAlexander Duyck 39064c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 39164c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 39219baf839SRobert Olsson 393f38b24c9SFiro Yang tnode = tnode_alloc(bits); 394dc35dbedSAlexander Duyck if (!tnode) 395dc35dbedSAlexander Duyck return NULL; 396dc35dbedSAlexander Duyck 397f38b24c9SFiro Yang pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 398f38b24c9SFiro Yang sizeof(struct key_vector *) << bits); 399f38b24c9SFiro Yang 40095f60ea3SAlexander Duyck if (bits == KEYLENGTH) 4016e22d174SAlexander Duyck tnode->full_children = 1; 40295f60ea3SAlexander Duyck else 4036e22d174SAlexander Duyck tnode->empty_children = 1ul << bits; 404c877efb2SStephen Hemminger 405f38b24c9SFiro Yang tn = tnode->kv; 406dc35dbedSAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 407dc35dbedSAlexander Duyck tn->pos = pos; 408dc35dbedSAlexander Duyck tn->bits = bits; 409dc35dbedSAlexander Duyck tn->slen = pos; 410dc35dbedSAlexander Duyck 41119baf839SRobert Olsson return tn; 41219baf839SRobert Olsson } 41319baf839SRobert Olsson 414e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 41519baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 41619baf839SRobert Olsson */ 41735c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n) 41819baf839SRobert Olsson { 419e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 42019baf839SRobert Olsson } 42119baf839SRobert Olsson 422ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 42319baf839SRobert Olsson * Update the value of full_children and empty_children. 42419baf839SRobert Olsson */ 42535c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i, 42635c6edacSAlexander Duyck struct key_vector *n) 42719baf839SRobert Olsson { 428754baf8dSAlexander Duyck struct key_vector *chi = get_child(tn, i); 429ff181ed8SAlexander Duyck int isfull, wasfull; 43019baf839SRobert Olsson 4312e1ac88aSAlexander Duyck BUG_ON(i >= child_length(tn)); 4320c7770c7SStephen Hemminger 43395f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 43400db4124SIan Morris if (!n && chi) 43595f60ea3SAlexander Duyck empty_child_inc(tn); 43600db4124SIan Morris if (n && !chi) 43795f60ea3SAlexander Duyck empty_child_dec(tn); 43819baf839SRobert Olsson 43919baf839SRobert Olsson /* update fullChildren */ 44019baf839SRobert Olsson wasfull = tnode_full(tn, chi); 44119baf839SRobert Olsson isfull = tnode_full(tn, n); 442ff181ed8SAlexander Duyck 44319baf839SRobert Olsson if (wasfull && !isfull) 4446e22d174SAlexander Duyck tn_info(tn)->full_children--; 44519baf839SRobert Olsson else if (!wasfull && isfull) 4466e22d174SAlexander Duyck tn_info(tn)->full_children++; 44791b9a277SOlof Johansson 4485405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4495405afd1SAlexander Duyck tn->slen = n->slen; 4505405afd1SAlexander Duyck 45141b489fdSAlexander Duyck rcu_assign_pointer(tn->tnode[i], n); 45219baf839SRobert Olsson } 45319baf839SRobert Olsson 45435c6edacSAlexander Duyck static void update_children(struct key_vector *tn) 45569fa57b1SAlexander Duyck { 45669fa57b1SAlexander Duyck unsigned long i; 45769fa57b1SAlexander Duyck 45869fa57b1SAlexander Duyck /* update all of the child parent pointers */ 4592e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 460754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 46169fa57b1SAlexander Duyck 46269fa57b1SAlexander Duyck if (!inode) 46369fa57b1SAlexander Duyck continue; 46469fa57b1SAlexander Duyck 46569fa57b1SAlexander Duyck /* Either update the children of a tnode that 46669fa57b1SAlexander Duyck * already belongs to us or update the child 46769fa57b1SAlexander Duyck * to point to ourselves. 46869fa57b1SAlexander Duyck */ 46969fa57b1SAlexander Duyck if (node_parent(inode) == tn) 47069fa57b1SAlexander Duyck update_children(inode); 47169fa57b1SAlexander Duyck else 47269fa57b1SAlexander Duyck node_set_parent(inode, tn); 47369fa57b1SAlexander Duyck } 47469fa57b1SAlexander Duyck } 47569fa57b1SAlexander Duyck 47688bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key, 47788bae714SAlexander Duyck struct key_vector *n) 478836a0123SAlexander Duyck { 47988bae714SAlexander Duyck if (IS_TRIE(tp)) 48088bae714SAlexander Duyck rcu_assign_pointer(tp->tnode[0], n); 481836a0123SAlexander Duyck else 48288bae714SAlexander Duyck put_child(tp, get_index(key, tp), n); 483836a0123SAlexander Duyck } 484836a0123SAlexander Duyck 48535c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn) 4860a5c0475SEric Dumazet { 48756ca2adfSAlexander Duyck tn_info(tn)->rcu.next = NULL; 4880a5c0475SEric Dumazet } 489fc86a93bSAlexander Duyck 49035c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn, 49135c6edacSAlexander Duyck struct key_vector *n) 492fc86a93bSAlexander Duyck { 49356ca2adfSAlexander Duyck tn_info(n)->rcu.next = tn_info(tn)->rcu.next; 49456ca2adfSAlexander Duyck tn_info(tn)->rcu.next = &tn_info(n)->rcu; 495fc86a93bSAlexander Duyck } 496fc86a93bSAlexander Duyck 49735c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn) 498fc86a93bSAlexander Duyck { 49956ca2adfSAlexander Duyck struct callback_head *head = &tn_info(tn)->rcu; 500fc86a93bSAlexander Duyck 501fc86a93bSAlexander Duyck while (head) { 502fc86a93bSAlexander Duyck head = head->next; 50341b489fdSAlexander Duyck tnode_free_size += TNODE_SIZE(1ul << tn->bits); 50437fd30f2SAlexander Duyck node_free(tn); 505fc86a93bSAlexander Duyck 50656ca2adfSAlexander Duyck tn = container_of(head, struct tnode, rcu)->kv; 507fc86a93bSAlexander Duyck } 508fc86a93bSAlexander Duyck 5099ab948a9SDavid Ahern if (tnode_free_size >= sysctl_fib_sync_mem) { 510fc86a93bSAlexander Duyck tnode_free_size = 0; 511fc86a93bSAlexander Duyck synchronize_rcu(); 512fc86a93bSAlexander Duyck } 5130a5c0475SEric Dumazet } 5140a5c0475SEric Dumazet 51588bae714SAlexander Duyck static struct key_vector *replace(struct trie *t, 51635c6edacSAlexander Duyck struct key_vector *oldtnode, 51735c6edacSAlexander Duyck struct key_vector *tn) 51869fa57b1SAlexander Duyck { 51935c6edacSAlexander Duyck struct key_vector *tp = node_parent(oldtnode); 52069fa57b1SAlexander Duyck unsigned long i; 52169fa57b1SAlexander Duyck 52269fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 52369fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 52488bae714SAlexander Duyck put_child_root(tp, tn->key, tn); 52569fa57b1SAlexander Duyck 52669fa57b1SAlexander Duyck /* update all of the child parent pointers */ 52769fa57b1SAlexander Duyck update_children(tn); 52869fa57b1SAlexander Duyck 52969fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 53069fa57b1SAlexander Duyck tnode_free(oldtnode); 53169fa57b1SAlexander Duyck 53269fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 5332e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 534754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 53569fa57b1SAlexander Duyck 53669fa57b1SAlexander Duyck /* resize child node */ 53769fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 53888bae714SAlexander Duyck tn = resize(t, inode); 53969fa57b1SAlexander Duyck } 5408d8e810cSAlexander Duyck 54188bae714SAlexander Duyck return tp; 54269fa57b1SAlexander Duyck } 54369fa57b1SAlexander Duyck 54488bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t, 54535c6edacSAlexander Duyck struct key_vector *oldtnode) 54619baf839SRobert Olsson { 54735c6edacSAlexander Duyck struct key_vector *tn; 54869fa57b1SAlexander Duyck unsigned long i; 549e9b44019SAlexander Duyck t_key m; 55019baf839SRobert Olsson 5510c7770c7SStephen Hemminger pr_debug("In inflate\n"); 55219baf839SRobert Olsson 553e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5542f80b3c8SRobert Olsson if (!tn) 5558d8e810cSAlexander Duyck goto notnode; 5562f36895aSRobert Olsson 55769fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 55869fa57b1SAlexander Duyck tnode_free_init(oldtnode); 55969fa57b1SAlexander Duyck 56012c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 56112c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 56212c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 56312c081a5SAlexander Duyck * nodes. 5642f36895aSRobert Olsson */ 5652e1ac88aSAlexander Duyck for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { 566754baf8dSAlexander Duyck struct key_vector *inode = get_child(oldtnode, --i); 56735c6edacSAlexander Duyck struct key_vector *node0, *node1; 56869fa57b1SAlexander Duyck unsigned long j, k; 56919baf839SRobert Olsson 57019baf839SRobert Olsson /* An empty child */ 57151456b29SIan Morris if (!inode) 57219baf839SRobert Olsson continue; 57319baf839SRobert Olsson 57419baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 575adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 576e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 57719baf839SRobert Olsson continue; 57819baf839SRobert Olsson } 57919baf839SRobert Olsson 58069fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 58169fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 58269fa57b1SAlexander Duyck 58319baf839SRobert Olsson /* An internal node with two children */ 58419baf839SRobert Olsson if (inode->bits == 1) { 585754baf8dSAlexander Duyck put_child(tn, 2 * i + 1, get_child(inode, 1)); 586754baf8dSAlexander Duyck put_child(tn, 2 * i, get_child(inode, 0)); 58791b9a277SOlof Johansson continue; 58819baf839SRobert Olsson } 58919baf839SRobert Olsson 59019baf839SRobert Olsson /* We will replace this node 'inode' with two new 59112c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 59219baf839SRobert Olsson * original children. The two new nodes will have 59319baf839SRobert Olsson * a position one bit further down the key and this 59419baf839SRobert Olsson * means that the "significant" part of their keys 59519baf839SRobert Olsson * (see the discussion near the top of this file) 59619baf839SRobert Olsson * will differ by one bit, which will be "0" in 59712c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 59819baf839SRobert Olsson * moving the key position by one step, the bit that 59919baf839SRobert Olsson * we are moving away from - the bit at position 60012c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 60112c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 60219baf839SRobert Olsson * two new keys. 60319baf839SRobert Olsson */ 60412c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 60512c081a5SAlexander Duyck if (!node1) 60612c081a5SAlexander Duyck goto nomem; 60769fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 60819baf839SRobert Olsson 60969fa57b1SAlexander Duyck tnode_free_append(tn, node1); 61012c081a5SAlexander Duyck if (!node0) 61112c081a5SAlexander Duyck goto nomem; 61212c081a5SAlexander Duyck tnode_free_append(tn, node0); 6132f36895aSRobert Olsson 61412c081a5SAlexander Duyck /* populate child pointers in new nodes */ 6152e1ac88aSAlexander Duyck for (k = child_length(inode), j = k / 2; j;) { 616754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 617754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 618754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 619754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 62019baf839SRobert Olsson } 621ff181ed8SAlexander Duyck 62212c081a5SAlexander Duyck /* link new nodes to parent */ 62312c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 62412c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 62512c081a5SAlexander Duyck 62612c081a5SAlexander Duyck /* link parent to nodes */ 62712c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 62812c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 62912c081a5SAlexander Duyck } 63012c081a5SAlexander Duyck 63169fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6328d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 633ff181ed8SAlexander Duyck nomem: 634fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 635fc86a93bSAlexander Duyck tnode_free(tn); 6368d8e810cSAlexander Duyck notnode: 6378d8e810cSAlexander Duyck return NULL; 638ff181ed8SAlexander Duyck } 639ff181ed8SAlexander Duyck 64088bae714SAlexander Duyck static struct key_vector *halve(struct trie *t, 64135c6edacSAlexander Duyck struct key_vector *oldtnode) 64219baf839SRobert Olsson { 64335c6edacSAlexander Duyck struct key_vector *tn; 64412c081a5SAlexander Duyck unsigned long i; 64519baf839SRobert Olsson 6460c7770c7SStephen Hemminger pr_debug("In halve\n"); 64719baf839SRobert Olsson 648e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6492f80b3c8SRobert Olsson if (!tn) 6508d8e810cSAlexander Duyck goto notnode; 6512f36895aSRobert Olsson 65269fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 65369fa57b1SAlexander Duyck tnode_free_init(oldtnode); 65469fa57b1SAlexander Duyck 65512c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 65612c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 65712c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 65812c081a5SAlexander Duyck * nodes. 6592f36895aSRobert Olsson */ 6602e1ac88aSAlexander Duyck for (i = child_length(oldtnode); i;) { 661754baf8dSAlexander Duyck struct key_vector *node1 = get_child(oldtnode, --i); 662754baf8dSAlexander Duyck struct key_vector *node0 = get_child(oldtnode, --i); 66335c6edacSAlexander Duyck struct key_vector *inode; 6642f36895aSRobert Olsson 66512c081a5SAlexander Duyck /* At least one of the children is empty */ 66612c081a5SAlexander Duyck if (!node1 || !node0) { 66712c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 66812c081a5SAlexander Duyck continue; 66912c081a5SAlexander Duyck } 6702f36895aSRobert Olsson 6712f36895aSRobert Olsson /* Two nonempty children */ 67212c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 6738d8e810cSAlexander Duyck if (!inode) 6748d8e810cSAlexander Duyck goto nomem; 67512c081a5SAlexander Duyck tnode_free_append(tn, inode); 6762f80b3c8SRobert Olsson 67712c081a5SAlexander Duyck /* initialize pointers out of node */ 67812c081a5SAlexander Duyck put_child(inode, 1, node1); 67912c081a5SAlexander Duyck put_child(inode, 0, node0); 68012c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 68112c081a5SAlexander Duyck 68212c081a5SAlexander Duyck /* link parent to node */ 68312c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6842f36895aSRobert Olsson } 6852f36895aSRobert Olsson 68669fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6878d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 6888d8e810cSAlexander Duyck nomem: 6898d8e810cSAlexander Duyck /* all pointers should be clean so we are done */ 6908d8e810cSAlexander Duyck tnode_free(tn); 6918d8e810cSAlexander Duyck notnode: 6928d8e810cSAlexander Duyck return NULL; 6932f80b3c8SRobert Olsson } 69419baf839SRobert Olsson 69588bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t, 69688bae714SAlexander Duyck struct key_vector *oldtnode) 69795f60ea3SAlexander Duyck { 69835c6edacSAlexander Duyck struct key_vector *n, *tp; 69995f60ea3SAlexander Duyck unsigned long i; 70095f60ea3SAlexander Duyck 70195f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 7022e1ac88aSAlexander Duyck for (n = NULL, i = child_length(oldtnode); !n && i;) 703754baf8dSAlexander Duyck n = get_child(oldtnode, --i); 70495f60ea3SAlexander Duyck 70595f60ea3SAlexander Duyck /* compress one level */ 70695f60ea3SAlexander Duyck tp = node_parent(oldtnode); 70788bae714SAlexander Duyck put_child_root(tp, oldtnode->key, n); 70895f60ea3SAlexander Duyck node_set_parent(n, tp); 70995f60ea3SAlexander Duyck 71095f60ea3SAlexander Duyck /* drop dead node */ 71195f60ea3SAlexander Duyck node_free(oldtnode); 71288bae714SAlexander Duyck 71388bae714SAlexander Duyck return tp; 71495f60ea3SAlexander Duyck } 71595f60ea3SAlexander Duyck 71635c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn) 7175405afd1SAlexander Duyck { 7185405afd1SAlexander Duyck unsigned char slen = tn->pos; 7195405afd1SAlexander Duyck unsigned long stride, i; 720a52ca62cSAlexander Duyck unsigned char slen_max; 721a52ca62cSAlexander Duyck 722a52ca62cSAlexander Duyck /* only vector 0 can have a suffix length greater than or equal to 723a52ca62cSAlexander Duyck * tn->pos + tn->bits, the second highest node will have a suffix 724a52ca62cSAlexander Duyck * length at most of tn->pos + tn->bits - 1 725a52ca62cSAlexander Duyck */ 726a52ca62cSAlexander Duyck slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); 7275405afd1SAlexander Duyck 7285405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 7295405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 7305405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 7315405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 7325405afd1SAlexander Duyck */ 7332e1ac88aSAlexander Duyck for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { 734754baf8dSAlexander Duyck struct key_vector *n = get_child(tn, i); 7355405afd1SAlexander Duyck 7365405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 7375405afd1SAlexander Duyck continue; 7385405afd1SAlexander Duyck 7395405afd1SAlexander Duyck /* update stride and slen based on new value */ 7405405afd1SAlexander Duyck stride <<= (n->slen - slen); 7415405afd1SAlexander Duyck slen = n->slen; 7425405afd1SAlexander Duyck i &= ~(stride - 1); 7435405afd1SAlexander Duyck 744a52ca62cSAlexander Duyck /* stop searching if we have hit the maximum possible value */ 745a52ca62cSAlexander Duyck if (slen >= slen_max) 7465405afd1SAlexander Duyck break; 7475405afd1SAlexander Duyck } 7485405afd1SAlexander Duyck 7495405afd1SAlexander Duyck tn->slen = slen; 7505405afd1SAlexander Duyck 7515405afd1SAlexander Duyck return slen; 7525405afd1SAlexander Duyck } 7535405afd1SAlexander Duyck 754f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 755cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 756cf3637bbSAlexander Duyck * Telecommunications, page 6: 757cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 758cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 759cf3637bbSAlexander Duyck * 760cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 761cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 7622e1ac88aSAlexander Duyck * child_length() and instead of multiplying by 2 (since the 763cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 764cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 765cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 766cf3637bbSAlexander Duyck * 7672e1ac88aSAlexander Duyck * The left-hand side may look a bit weird: child_length(tn) 768cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 769cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 770cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 771cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 772cf3637bbSAlexander Duyck * we just count them one extra time here. 773cf3637bbSAlexander Duyck * 774cf3637bbSAlexander Duyck * A clearer way to write this would be: 775cf3637bbSAlexander Duyck * 776cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 7772e1ac88aSAlexander Duyck * not_to_be_doubled = child_length(tn) - tn->empty_children - 778cf3637bbSAlexander Duyck * tn->full_children; 779cf3637bbSAlexander Duyck * 7802e1ac88aSAlexander Duyck * new_child_length = child_length(tn) * 2; 781cf3637bbSAlexander Duyck * 782cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 783cf3637bbSAlexander Duyck * new_child_length; 784cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 785cf3637bbSAlexander Duyck * 786cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 787cf3637bbSAlexander Duyck * 788cf3637bbSAlexander Duyck * anyway, 789cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 790cf3637bbSAlexander Duyck * inflate_threshold 791cf3637bbSAlexander Duyck * 792cf3637bbSAlexander Duyck * avoid a division: 793cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 794cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 795cf3637bbSAlexander Duyck * 796cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 7972e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 798cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 799cf3637bbSAlexander Duyck * 800cf3637bbSAlexander Duyck * expand new_child_length: 8012e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 802cf3637bbSAlexander Duyck * tn->full_children) >= 8032e1ac88aSAlexander Duyck * inflate_threshold * child_length(tn) * 2 804cf3637bbSAlexander Duyck * 805cf3637bbSAlexander Duyck * shorten again: 8062e1ac88aSAlexander Duyck * 50 * (tn->full_children + child_length(tn) - 807cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 8082e1ac88aSAlexander Duyck * child_length(tn) 809cf3637bbSAlexander Duyck * 810cf3637bbSAlexander Duyck */ 81135c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) 812f05a4819SAlexander Duyck { 8132e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 814f05a4819SAlexander Duyck unsigned long threshold = used; 815cf3637bbSAlexander Duyck 816cf3637bbSAlexander Duyck /* Keep root node larger */ 81788bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; 8186e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 8196e22d174SAlexander Duyck used += tn_info(tn)->full_children; 820cf3637bbSAlexander Duyck 82195f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 82295f60ea3SAlexander Duyck 82395f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 824cf3637bbSAlexander Duyck } 825cf3637bbSAlexander Duyck 82635c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) 827f05a4819SAlexander Duyck { 8282e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 829f05a4819SAlexander Duyck unsigned long threshold = used; 830cf3637bbSAlexander Duyck 831f05a4819SAlexander Duyck /* Keep root node larger */ 83288bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; 8336e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 834f05a4819SAlexander Duyck 83595f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 83695f60ea3SAlexander Duyck 83795f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 83895f60ea3SAlexander Duyck } 83995f60ea3SAlexander Duyck 84035c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn) 84195f60ea3SAlexander Duyck { 8422e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 84395f60ea3SAlexander Duyck 8446e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 84595f60ea3SAlexander Duyck 84695f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 8476e22d174SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) 84895f60ea3SAlexander Duyck used -= KEY_MAX; 84995f60ea3SAlexander Duyck 85095f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 85195f60ea3SAlexander Duyck return used < 2; 852f05a4819SAlexander Duyck } 853f05a4819SAlexander Duyck 854f05a4819SAlexander Duyck #define MAX_WORK 10 85588bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn) 856f05a4819SAlexander Duyck { 8578d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8588d8e810cSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 8598d8e810cSAlexander Duyck #endif 86035c6edacSAlexander Duyck struct key_vector *tp = node_parent(tn); 86188bae714SAlexander Duyck unsigned long cindex = get_index(tn->key, tp); 862a80e89d4SAlexander Duyck int max_work = MAX_WORK; 863f05a4819SAlexander Duyck 864f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 865f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 866f05a4819SAlexander Duyck 867ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 868ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 869ff181ed8SAlexander Duyck * thing without us interfering 870ff181ed8SAlexander Duyck */ 87188bae714SAlexander Duyck BUG_ON(tn != get_child(tp, cindex)); 872ff181ed8SAlexander Duyck 873f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 874f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 875f05a4819SAlexander Duyck */ 876b6f15f82SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 87788bae714SAlexander Duyck tp = inflate(t, tn); 87888bae714SAlexander Duyck if (!tp) { 879cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8808d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 881cf3637bbSAlexander Duyck #endif 882cf3637bbSAlexander Duyck break; 883cf3637bbSAlexander Duyck } 884ff181ed8SAlexander Duyck 885b6f15f82SAlexander Duyck max_work--; 88688bae714SAlexander Duyck tn = get_child(tp, cindex); 887cf3637bbSAlexander Duyck } 888cf3637bbSAlexander Duyck 889b6f15f82SAlexander Duyck /* update parent in case inflate failed */ 890b6f15f82SAlexander Duyck tp = node_parent(tn); 891b6f15f82SAlexander Duyck 892cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 893cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 894b6f15f82SAlexander Duyck return tp; 895cf3637bbSAlexander Duyck 896f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 897cf3637bbSAlexander Duyck * node is above threshold. 898cf3637bbSAlexander Duyck */ 899b6f15f82SAlexander Duyck while (should_halve(tp, tn) && max_work) { 90088bae714SAlexander Duyck tp = halve(t, tn); 90188bae714SAlexander Duyck if (!tp) { 902cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 9038d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 904cf3637bbSAlexander Duyck #endif 905cf3637bbSAlexander Duyck break; 906cf3637bbSAlexander Duyck } 907cf3637bbSAlexander Duyck 908b6f15f82SAlexander Duyck max_work--; 90988bae714SAlexander Duyck tn = get_child(tp, cindex); 910ff181ed8SAlexander Duyck } 911cf3637bbSAlexander Duyck 912cf3637bbSAlexander Duyck /* Only one child remains */ 91388bae714SAlexander Duyck if (should_collapse(tn)) 91488bae714SAlexander Duyck return collapse(t, tn); 91588bae714SAlexander Duyck 916b6f15f82SAlexander Duyck /* update parent in case halve failed */ 917a52ca62cSAlexander Duyck return node_parent(tn); 918cf3637bbSAlexander Duyck } 9198d8e810cSAlexander Duyck 9201a239173SAlexander Duyck static void node_pull_suffix(struct key_vector *tn, unsigned char slen) 92119baf839SRobert Olsson { 9221a239173SAlexander Duyck unsigned char node_slen = tn->slen; 9231a239173SAlexander Duyck 9241a239173SAlexander Duyck while ((node_slen > tn->pos) && (node_slen > slen)) { 9251a239173SAlexander Duyck slen = update_suffix(tn); 9261a239173SAlexander Duyck if (node_slen == slen) 9275405afd1SAlexander Duyck break; 9281a239173SAlexander Duyck 9291a239173SAlexander Duyck tn = node_parent(tn); 9301a239173SAlexander Duyck node_slen = tn->slen; 9315405afd1SAlexander Duyck } 9325405afd1SAlexander Duyck } 9335405afd1SAlexander Duyck 9341a239173SAlexander Duyck static void node_push_suffix(struct key_vector *tn, unsigned char slen) 9355405afd1SAlexander Duyck { 9361a239173SAlexander Duyck while (tn->slen < slen) { 9371a239173SAlexander Duyck tn->slen = slen; 9385405afd1SAlexander Duyck tn = node_parent(tn); 9395405afd1SAlexander Duyck } 9405405afd1SAlexander Duyck } 9415405afd1SAlexander Duyck 9422373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 94335c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t, 94435c6edacSAlexander Duyck struct key_vector **tp, u32 key) 94519baf839SRobert Olsson { 94688bae714SAlexander Duyck struct key_vector *pn, *n = t->kv; 94788bae714SAlexander Duyck unsigned long index = 0; 94819baf839SRobert Olsson 94988bae714SAlexander Duyck do { 95088bae714SAlexander Duyck pn = n; 95188bae714SAlexander Duyck n = get_child_rcu(n, index); 95288bae714SAlexander Duyck 95388bae714SAlexander Duyck if (!n) 95488bae714SAlexander Duyck break; 95588bae714SAlexander Duyck 95688bae714SAlexander Duyck index = get_cindex(key, n); 95719baf839SRobert Olsson 958939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 959939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 960939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 961939afb06SAlexander Duyck * is the difference between the key and this value. From 962939afb06SAlexander Duyck * this we can actually derive several pieces of data. 963d4a975e8SAlexander Duyck * if (index >= (1ul << bits)) 964939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 965b3832117SAlexander Duyck * else 966b3832117SAlexander Duyck * we know the value is cindex 967d4a975e8SAlexander Duyck * 968d4a975e8SAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 969d4a975e8SAlexander Duyck * fact that we can only allocate a node with 32 bits if a 970d4a975e8SAlexander Duyck * long is greater than 32 bits. 971939afb06SAlexander Duyck */ 972d4a975e8SAlexander Duyck if (index >= (1ul << n->bits)) { 973d4a975e8SAlexander Duyck n = NULL; 974d4a975e8SAlexander Duyck break; 975d4a975e8SAlexander Duyck } 976939afb06SAlexander Duyck 97788bae714SAlexander Duyck /* keep searching until we find a perfect match leaf or NULL */ 97888bae714SAlexander Duyck } while (IS_TNODE(n)); 979939afb06SAlexander Duyck 98035c6edacSAlexander Duyck *tp = pn; 981d4a975e8SAlexander Duyck 982939afb06SAlexander Duyck return n; 98319baf839SRobert Olsson } 98419baf839SRobert Olsson 98502525368SAlexander Duyck /* Return the first fib alias matching TOS with 98602525368SAlexander Duyck * priority less than or equal to PRIO. 98702525368SAlexander Duyck */ 98879e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, 9890b65bd97SAlexander Duyck u8 tos, u32 prio, u32 tb_id) 99002525368SAlexander Duyck { 99102525368SAlexander Duyck struct fib_alias *fa; 99202525368SAlexander Duyck 99302525368SAlexander Duyck if (!fah) 99402525368SAlexander Duyck return NULL; 99502525368SAlexander Duyck 99656315f9eSAlexander Duyck hlist_for_each_entry(fa, fah, fa_list) { 99779e5ad2cSAlexander Duyck if (fa->fa_slen < slen) 99879e5ad2cSAlexander Duyck continue; 99979e5ad2cSAlexander Duyck if (fa->fa_slen != slen) 100079e5ad2cSAlexander Duyck break; 10010b65bd97SAlexander Duyck if (fa->tb_id > tb_id) 10020b65bd97SAlexander Duyck continue; 10030b65bd97SAlexander Duyck if (fa->tb_id != tb_id) 10040b65bd97SAlexander Duyck break; 100502525368SAlexander Duyck if (fa->fa_tos > tos) 100602525368SAlexander Duyck continue; 100702525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 100802525368SAlexander Duyck return fa; 100902525368SAlexander Duyck } 101002525368SAlexander Duyck 101102525368SAlexander Duyck return NULL; 101202525368SAlexander Duyck } 101302525368SAlexander Duyck 101435c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn) 101519baf839SRobert Olsson { 101688bae714SAlexander Duyck while (!IS_TRIE(tn)) 101788bae714SAlexander Duyck tn = resize(t, tn); 101819baf839SRobert Olsson } 101919baf839SRobert Olsson 102035c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp, 1021d5d6487cSAlexander Duyck struct fib_alias *new, t_key key) 102219baf839SRobert Olsson { 102335c6edacSAlexander Duyck struct key_vector *n, *l; 1024836a0123SAlexander Duyck 1025d5d6487cSAlexander Duyck l = leaf_new(key, new); 102679e5ad2cSAlexander Duyck if (!l) 10278d8e810cSAlexander Duyck goto noleaf; 1028d5d6487cSAlexander Duyck 1029d5d6487cSAlexander Duyck /* retrieve child from parent node */ 1030754baf8dSAlexander Duyck n = get_child(tp, get_index(key, tp)); 103119baf839SRobert Olsson 1032836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1033836a0123SAlexander Duyck * 103419baf839SRobert Olsson * Add a new tnode here 103519baf839SRobert Olsson * first tnode need some special handling 1036836a0123SAlexander Duyck * leaves us in position for handling as case 3 103719baf839SRobert Olsson */ 103819baf839SRobert Olsson if (n) { 103935c6edacSAlexander Duyck struct key_vector *tn; 1040f835e471SRobert Olsson 1041e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 10428d8e810cSAlexander Duyck if (!tn) 10438d8e810cSAlexander Duyck goto notnode; 104419baf839SRobert Olsson 1045836a0123SAlexander Duyck /* initialize routes out of node */ 1046836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1047836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 104819baf839SRobert Olsson 1049836a0123SAlexander Duyck /* start adding routes into the node */ 105088bae714SAlexander Duyck put_child_root(tp, key, tn); 1051836a0123SAlexander Duyck node_set_parent(n, tn); 105219baf839SRobert Olsson 1053836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1054e962f302SAlexander Duyck tp = tn; 105519baf839SRobert Olsson } 105691b9a277SOlof Johansson 1057836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1058a52ca62cSAlexander Duyck node_push_suffix(tp, new->fa_slen); 1059836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 106088bae714SAlexander Duyck put_child_root(tp, key, l); 10617b85576dSJarek Poplawski trie_rebalance(t, tp); 1062d5d6487cSAlexander Duyck 1063d5d6487cSAlexander Duyck return 0; 10648d8e810cSAlexander Duyck notnode: 10658d8e810cSAlexander Duyck node_free(l); 10668d8e810cSAlexander Duyck noleaf: 10678d8e810cSAlexander Duyck return -ENOMEM; 1068d5d6487cSAlexander Duyck } 1069d5d6487cSAlexander Duyck 10706635f311SDavid Ahern /* fib notifier for ADD is sent before calling fib_insert_alias with 10716635f311SDavid Ahern * the expectation that the only possible failure ENOMEM 10726635f311SDavid Ahern */ 107335c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp, 107435c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *new, 1075d5d6487cSAlexander Duyck struct fib_alias *fa, t_key key) 1076d5d6487cSAlexander Duyck { 1077d5d6487cSAlexander Duyck if (!l) 1078d5d6487cSAlexander Duyck return fib_insert_node(t, tp, new, key); 1079d5d6487cSAlexander Duyck 1080d5d6487cSAlexander Duyck if (fa) { 1081d5d6487cSAlexander Duyck hlist_add_before_rcu(&new->fa_list, &fa->fa_list); 1082836a0123SAlexander Duyck } else { 1083d5d6487cSAlexander Duyck struct fib_alias *last; 1084d5d6487cSAlexander Duyck 1085d5d6487cSAlexander Duyck hlist_for_each_entry(last, &l->leaf, fa_list) { 1086d5d6487cSAlexander Duyck if (new->fa_slen < last->fa_slen) 1087d5d6487cSAlexander Duyck break; 10880b65bd97SAlexander Duyck if ((new->fa_slen == last->fa_slen) && 10890b65bd97SAlexander Duyck (new->tb_id > last->tb_id)) 10900b65bd97SAlexander Duyck break; 1091d5d6487cSAlexander Duyck fa = last; 1092836a0123SAlexander Duyck } 1093836a0123SAlexander Duyck 1094d5d6487cSAlexander Duyck if (fa) 1095d5d6487cSAlexander Duyck hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); 1096d5d6487cSAlexander Duyck else 1097d5d6487cSAlexander Duyck hlist_add_head_rcu(&new->fa_list, &l->leaf); 109819baf839SRobert Olsson } 109919baf839SRobert Olsson 1100d5d6487cSAlexander Duyck /* if we added to the tail node then we need to update slen */ 1101d5d6487cSAlexander Duyck if (l->slen < new->fa_slen) { 1102d5d6487cSAlexander Duyck l->slen = new->fa_slen; 11031a239173SAlexander Duyck node_push_suffix(tp, new->fa_slen); 1104d5d6487cSAlexander Duyck } 1105d5d6487cSAlexander Duyck 1106d5d6487cSAlexander Duyck return 0; 1107d5d6487cSAlexander Duyck } 1108d5d6487cSAlexander Duyck 110978055998SDavid Ahern static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) 1110ba277e8eSDavid Ahern { 111178055998SDavid Ahern if (plen > KEYLENGTH) { 111278055998SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid prefix length"); 1113ba277e8eSDavid Ahern return false; 111478055998SDavid Ahern } 1115ba277e8eSDavid Ahern 111678055998SDavid Ahern if ((plen < KEYLENGTH) && (key << plen)) { 111778055998SDavid Ahern NL_SET_ERR_MSG(extack, 111878055998SDavid Ahern "Invalid prefix for given prefix length"); 1119ba277e8eSDavid Ahern return false; 112078055998SDavid Ahern } 1121ba277e8eSDavid Ahern 1122ba277e8eSDavid Ahern return true; 1123ba277e8eSDavid Ahern } 1124ba277e8eSDavid Ahern 1125d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1126b90eb754SJiri Pirko int fib_table_insert(struct net *net, struct fib_table *tb, 11276d8422a1SDavid Ahern struct fib_config *cfg, struct netlink_ext_ack *extack) 112819baf839SRobert Olsson { 11292f3a5272SIdo Schimmel enum fib_event_type event = FIB_EVENT_ENTRY_ADD; 113019baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 113119baf839SRobert Olsson struct fib_alias *fa, *new_fa; 113235c6edacSAlexander Duyck struct key_vector *l, *tp; 1133b93e1fa7SGuillaume Nault u16 nlflags = NLM_F_EXCL; 113419baf839SRobert Olsson struct fib_info *fi; 113579e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 113679e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 11374e902c57SThomas Graf u8 tos = cfg->fc_tos; 1138d4a975e8SAlexander Duyck u32 key; 113919baf839SRobert Olsson int err; 114019baf839SRobert Olsson 11414e902c57SThomas Graf key = ntohl(cfg->fc_dst); 114219baf839SRobert Olsson 114378055998SDavid Ahern if (!fib_valid_key_len(key, plen, extack)) 114419baf839SRobert Olsson return -EINVAL; 114519baf839SRobert Olsson 1146ba277e8eSDavid Ahern pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 1147ba277e8eSDavid Ahern 11486d8422a1SDavid Ahern fi = fib_create_info(cfg, extack); 11494e902c57SThomas Graf if (IS_ERR(fi)) { 11504e902c57SThomas Graf err = PTR_ERR(fi); 115119baf839SRobert Olsson goto err; 11524e902c57SThomas Graf } 115319baf839SRobert Olsson 1154d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 11550b65bd97SAlexander Duyck fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, 11560b65bd97SAlexander Duyck tb->tb_id) : NULL; 115719baf839SRobert Olsson 115819baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 115919baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 116019baf839SRobert Olsson * exists or to the node before which we will insert new one. 116119baf839SRobert Olsson * 116219baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 116356315f9eSAlexander Duyck * insert to the tail of the section matching the suffix length 116456315f9eSAlexander Duyck * of the new alias. 116519baf839SRobert Olsson */ 116619baf839SRobert Olsson 1167936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1168936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1169936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 117019baf839SRobert Olsson 117119baf839SRobert Olsson err = -EEXIST; 11724e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 117319baf839SRobert Olsson goto out; 117419baf839SRobert Olsson 1175b93e1fa7SGuillaume Nault nlflags &= ~NLM_F_EXCL; 1176b93e1fa7SGuillaume Nault 1177936f6f8eSJulian Anastasov /* We have 2 goals: 1178936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1179936f6f8eSJulian Anastasov * duplicate routes 1180936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1181936f6f8eSJulian Anastasov */ 1182936f6f8eSJulian Anastasov fa_match = NULL; 1183936f6f8eSJulian Anastasov fa_first = fa; 118456315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 11850b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 11860b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 11870b65bd97SAlexander Duyck (fa->fa_tos != tos)) 1188936f6f8eSJulian Anastasov break; 1189936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1190936f6f8eSJulian Anastasov break; 1191936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1192936f6f8eSJulian Anastasov fa->fa_info == fi) { 1193936f6f8eSJulian Anastasov fa_match = fa; 1194936f6f8eSJulian Anastasov break; 1195936f6f8eSJulian Anastasov } 1196936f6f8eSJulian Anastasov } 1197936f6f8eSJulian Anastasov 11984e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 119919baf839SRobert Olsson struct fib_info *fi_drop; 120019baf839SRobert Olsson u8 state; 120119baf839SRobert Olsson 1202b93e1fa7SGuillaume Nault nlflags |= NLM_F_REPLACE; 1203936f6f8eSJulian Anastasov fa = fa_first; 1204936f6f8eSJulian Anastasov if (fa_match) { 1205936f6f8eSJulian Anastasov if (fa == fa_match) 1206936f6f8eSJulian Anastasov err = 0; 12076725033fSJoonwoo Park goto out; 1208936f6f8eSJulian Anastasov } 12092373ce1cSRobert Olsson err = -ENOBUFS; 1210e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 121151456b29SIan Morris if (!new_fa) 12122373ce1cSRobert Olsson goto out; 121319baf839SRobert Olsson 121419baf839SRobert Olsson fi_drop = fa->fa_info; 12152373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12162373ce1cSRobert Olsson new_fa->fa_info = fi; 12174e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 121819baf839SRobert Olsson state = fa->fa_state; 1219936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 12209b6ebad5SAlexander Duyck new_fa->fa_slen = fa->fa_slen; 1221d4e64c29SMichal Kubeček new_fa->tb_id = tb->tb_id; 12222392debcSJulian Anastasov new_fa->fa_default = -1; 122319baf839SRobert Olsson 1224c1d7ee67SDavid Ahern err = call_fib_entry_notifiers(net, 1225c1d7ee67SDavid Ahern FIB_EVENT_ENTRY_REPLACE, 1226c1d7ee67SDavid Ahern key, plen, new_fa, 1227c1d7ee67SDavid Ahern extack); 1228c1d7ee67SDavid Ahern if (err) 1229c1d7ee67SDavid Ahern goto out_free_new_fa; 1230c1d7ee67SDavid Ahern 12315b7d616dSIdo Schimmel rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 12325b7d616dSIdo Schimmel tb->tb_id, &cfg->fc_nlinfo, nlflags); 12335b7d616dSIdo Schimmel 123456315f9eSAlexander Duyck hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12358e05fd71SScott Feldman 12362373ce1cSRobert Olsson alias_free_mem_rcu(fa); 123719baf839SRobert Olsson 123819baf839SRobert Olsson fib_release_info(fi_drop); 123919baf839SRobert Olsson if (state & FA_S_ACCESSED) 12404ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1241b90eb754SJiri Pirko 124219baf839SRobert Olsson goto succeeded; 124319baf839SRobert Olsson } 124419baf839SRobert Olsson /* Error if we find a perfect match which 124519baf839SRobert Olsson * uses the same scope, type, and nexthop 124619baf839SRobert Olsson * information. 124719baf839SRobert Olsson */ 1248936f6f8eSJulian Anastasov if (fa_match) 124919baf839SRobert Olsson goto out; 1250a07f5f50SStephen Hemminger 12512f3a5272SIdo Schimmel if (cfg->fc_nlflags & NLM_F_APPEND) { 12522f3a5272SIdo Schimmel event = FIB_EVENT_ENTRY_APPEND; 1253b93e1fa7SGuillaume Nault nlflags |= NLM_F_APPEND; 12542f3a5272SIdo Schimmel } else { 1255936f6f8eSJulian Anastasov fa = fa_first; 125619baf839SRobert Olsson } 12572f3a5272SIdo Schimmel } 125819baf839SRobert Olsson err = -ENOENT; 12594e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 126019baf839SRobert Olsson goto out; 126119baf839SRobert Olsson 1262b93e1fa7SGuillaume Nault nlflags |= NLM_F_CREATE; 126319baf839SRobert Olsson err = -ENOBUFS; 1264e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 126551456b29SIan Morris if (!new_fa) 126619baf839SRobert Olsson goto out; 126719baf839SRobert Olsson 126819baf839SRobert Olsson new_fa->fa_info = fi; 126919baf839SRobert Olsson new_fa->fa_tos = tos; 12704e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 127119baf839SRobert Olsson new_fa->fa_state = 0; 127279e5ad2cSAlexander Duyck new_fa->fa_slen = slen; 12730ddcf43dSAlexander Duyck new_fa->tb_id = tb->tb_id; 12742392debcSJulian Anastasov new_fa->fa_default = -1; 127519baf839SRobert Olsson 12766635f311SDavid Ahern err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); 12776635f311SDavid Ahern if (err) 12786635f311SDavid Ahern goto out_free_new_fa; 12796635f311SDavid Ahern 12809b6ebad5SAlexander Duyck /* Insert new entry to the list. */ 1281d5d6487cSAlexander Duyck err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1282d5d6487cSAlexander Duyck if (err) 12836635f311SDavid Ahern goto out_fib_notif; 128419baf839SRobert Olsson 128521d8c49eSDavid S. Miller if (!plen) 128621d8c49eSDavid S. Miller tb->tb_num_default++; 128721d8c49eSDavid S. Miller 12884ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 12890ddcf43dSAlexander Duyck rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1290a2bb6d7dSRoopa Prabhu &cfg->fc_nlinfo, nlflags); 129119baf839SRobert Olsson succeeded: 129219baf839SRobert Olsson return 0; 1293f835e471SRobert Olsson 12946635f311SDavid Ahern out_fib_notif: 12956635f311SDavid Ahern /* notifier was sent that entry would be added to trie, but 12966635f311SDavid Ahern * the add failed and need to recover. Only failure for 12976635f311SDavid Ahern * fib_insert_alias is ENOMEM. 12986635f311SDavid Ahern */ 12996635f311SDavid Ahern NL_SET_ERR_MSG(extack, "Failed to insert route into trie"); 13006635f311SDavid Ahern call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, 13016635f311SDavid Ahern plen, new_fa, NULL); 1302f835e471SRobert Olsson out_free_new_fa: 1303f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 130419baf839SRobert Olsson out: 130519baf839SRobert Olsson fib_release_info(fi); 130691b9a277SOlof Johansson err: 130719baf839SRobert Olsson return err; 130819baf839SRobert Olsson } 130919baf839SRobert Olsson 131035c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n) 13119f9e636dSAlexander Duyck { 13129f9e636dSAlexander Duyck t_key prefix = n->key; 13139f9e636dSAlexander Duyck 13149f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 13159f9e636dSAlexander Duyck } 13169f9e636dSAlexander Duyck 1317345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 131822bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1319ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 132019baf839SRobert Olsson { 132119baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 13228274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 13238274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 13248274a97aSAlexander Duyck #endif 13259f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 132635c6edacSAlexander Duyck struct key_vector *n, *pn; 132779e5ad2cSAlexander Duyck struct fib_alias *fa; 132871e8b67dSAlexander Duyck unsigned long index; 13299f9e636dSAlexander Duyck t_key cindex; 133019baf839SRobert Olsson 133188bae714SAlexander Duyck pn = t->kv; 133288bae714SAlexander Duyck cindex = 0; 133388bae714SAlexander Duyck 133488bae714SAlexander Duyck n = get_child_rcu(pn, cindex); 13359f323973SDavid Ahern if (!n) { 13369f323973SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN); 1337345e9b54SAlexander Duyck return -EAGAIN; 13389f323973SDavid Ahern } 133919baf839SRobert Olsson 134019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13418274a97aSAlexander Duyck this_cpu_inc(stats->gets); 134219baf839SRobert Olsson #endif 134319baf839SRobert Olsson 13449f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 13459f9e636dSAlexander Duyck for (;;) { 134688bae714SAlexander Duyck index = get_cindex(key, n); 13479f9e636dSAlexander Duyck 13489f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13499f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13509f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13519f9e636dSAlexander Duyck * is the difference between the key and this value. From 13529f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 135371e8b67dSAlexander Duyck * if (index >= (1ul << bits)) 13549f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1355b3832117SAlexander Duyck * else 1356b3832117SAlexander Duyck * we know the value is cindex 135771e8b67dSAlexander Duyck * 135871e8b67dSAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 135971e8b67dSAlexander Duyck * fact that we can only allocate a node with 32 bits if a 136071e8b67dSAlexander Duyck * long is greater than 32 bits. 13619f9e636dSAlexander Duyck */ 136271e8b67dSAlexander Duyck if (index >= (1ul << n->bits)) 13639f9e636dSAlexander Duyck break; 13649f9e636dSAlexander Duyck 13659f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13669f9e636dSAlexander Duyck if (IS_LEAF(n)) 1367a07f5f50SStephen Hemminger goto found; 13689f9e636dSAlexander Duyck 13699f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13709f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13719f9e636dSAlexander Duyck */ 13725405afd1SAlexander Duyck if (n->slen > n->pos) { 13739f9e636dSAlexander Duyck pn = n; 13749f9e636dSAlexander Duyck cindex = index; 137519baf839SRobert Olsson } 1376a07f5f50SStephen Hemminger 1377754baf8dSAlexander Duyck n = get_child_rcu(n, index); 13789f9e636dSAlexander Duyck if (unlikely(!n)) 13799f9e636dSAlexander Duyck goto backtrace; 13809f9e636dSAlexander Duyck } 138119baf839SRobert Olsson 13829f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13839f9e636dSAlexander Duyck for (;;) { 13849f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 138535c6edacSAlexander Duyck struct key_vector __rcu **cptr = n->tnode; 138619baf839SRobert Olsson 13879f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13889f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13899f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13909f9e636dSAlexander Duyck */ 13915405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13929f9e636dSAlexander Duyck goto backtrace; 139319baf839SRobert Olsson 13949f9e636dSAlexander Duyck /* exit out and process leaf */ 13959f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13969f9e636dSAlexander Duyck break; 139719baf839SRobert Olsson 13989f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13999f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 14009f9e636dSAlexander Duyck * we started this traversal anyway 14019f9e636dSAlexander Duyck */ 14029f9e636dSAlexander Duyck 14039f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 14049f9e636dSAlexander Duyck backtrace: 140519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14069f9e636dSAlexander Duyck if (!n) 14078274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 140819baf839SRobert Olsson #endif 14099f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 14109f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 14119f9e636dSAlexander Duyck * up one level to see if there are any more bits to 14129f9e636dSAlexander Duyck * be stripped there. 141319baf839SRobert Olsson */ 14149f9e636dSAlexander Duyck while (!cindex) { 14159f9e636dSAlexander Duyck t_key pkey = pn->key; 141619baf839SRobert Olsson 141788bae714SAlexander Duyck /* If we don't have a parent then there is 141888bae714SAlexander Duyck * nothing for us to do as we do not have any 141988bae714SAlexander Duyck * further nodes to parse. 142088bae714SAlexander Duyck */ 14219f323973SDavid Ahern if (IS_TRIE(pn)) { 14229f323973SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp, 14239f323973SDavid Ahern NULL, -EAGAIN); 1424345e9b54SAlexander Duyck return -EAGAIN; 14259f323973SDavid Ahern } 142619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14278274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 142819baf839SRobert Olsson #endif 14299f9e636dSAlexander Duyck /* Get Child's index */ 143088bae714SAlexander Duyck pn = node_parent_rcu(pn); 14319f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 14329f9e636dSAlexander Duyck } 14339f9e636dSAlexander Duyck 14349f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 14359f9e636dSAlexander Duyck cindex &= cindex - 1; 14369f9e636dSAlexander Duyck 14379f9e636dSAlexander Duyck /* grab pointer for next child node */ 143841b489fdSAlexander Duyck cptr = &pn->tnode[cindex]; 143919baf839SRobert Olsson } 144019baf839SRobert Olsson } 14419f9e636dSAlexander Duyck 144219baf839SRobert Olsson found: 144371e8b67dSAlexander Duyck /* this line carries forward the xor from earlier in the function */ 144471e8b67dSAlexander Duyck index = key ^ n->key; 144571e8b67dSAlexander Duyck 14469f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 144779e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 1448345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1449345e9b54SAlexander Duyck int nhsel, err; 1450345e9b54SAlexander Duyck 1451a5829f53SAlexander Duyck if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { 1452a5829f53SAlexander Duyck if (index >= (1ul << fa->fa_slen)) 14539b6ebad5SAlexander Duyck continue; 1454a5829f53SAlexander Duyck } 1455345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1456345e9b54SAlexander Duyck continue; 1457345e9b54SAlexander Duyck if (fi->fib_dead) 1458345e9b54SAlexander Duyck continue; 1459345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1460345e9b54SAlexander Duyck continue; 1461345e9b54SAlexander Duyck fib_alias_accessed(fa); 1462345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1463345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1464345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1465345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1466345e9b54SAlexander Duyck #endif 14679f323973SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp, NULL, err); 1468345e9b54SAlexander Duyck return err; 1469345e9b54SAlexander Duyck } 1470345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1471345e9b54SAlexander Duyck continue; 14725481d73fSDavid Ahern for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { 1473eba618abSDavid Ahern struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); 1474345e9b54SAlexander Duyck 1475eba618abSDavid Ahern if (nhc->nhc_flags & RTNH_F_DEAD) 1476345e9b54SAlexander Duyck continue; 1477eba618abSDavid Ahern if (ip_ignore_linkdown(nhc->nhc_dev) && 1478eba618abSDavid Ahern nhc->nhc_flags & RTNH_F_LINKDOWN && 14790eeb075fSAndy Gospodarek !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 14800eeb075fSAndy Gospodarek continue; 148158189ca7SDavid Ahern if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { 1482613d09b3SDavid Ahern if (flp->flowi4_oif && 1483eba618abSDavid Ahern flp->flowi4_oif != nhc->nhc_oif) 1484345e9b54SAlexander Duyck continue; 1485613d09b3SDavid Ahern } 1486345e9b54SAlexander Duyck 1487345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 14880029c0deSReshetova, Elena refcount_inc(&fi->fib_clntref); 1489345e9b54SAlexander Duyck 14906ffd9034SDavid Ahern res->prefix = htonl(n->key); 14919b6ebad5SAlexander Duyck res->prefixlen = KEYLENGTH - fa->fa_slen; 1492345e9b54SAlexander Duyck res->nh_sel = nhsel; 1493eba618abSDavid Ahern res->nhc = nhc; 1494345e9b54SAlexander Duyck res->type = fa->fa_type; 1495345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1496345e9b54SAlexander Duyck res->fi = fi; 1497345e9b54SAlexander Duyck res->table = tb; 149879e5ad2cSAlexander Duyck res->fa_head = &n->leaf; 1499345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1500345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1501345e9b54SAlexander Duyck #endif 1502eba618abSDavid Ahern trace_fib_table_lookup(tb->tb_id, flp, nhc, err); 1503f6d3c192SDavid Ahern 1504345e9b54SAlexander Duyck return err; 1505345e9b54SAlexander Duyck } 1506345e9b54SAlexander Duyck } 1507345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1508345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1509345e9b54SAlexander Duyck #endif 15109f9e636dSAlexander Duyck goto backtrace; 151119baf839SRobert Olsson } 15126fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 151319baf839SRobert Olsson 151435c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp, 151535c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *old) 1516d5d6487cSAlexander Duyck { 1517d5d6487cSAlexander Duyck /* record the location of the previous list_info entry */ 1518d5d6487cSAlexander Duyck struct hlist_node **pprev = old->fa_list.pprev; 1519d5d6487cSAlexander Duyck struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); 1520d5d6487cSAlexander Duyck 1521d5d6487cSAlexander Duyck /* remove the fib_alias from the list */ 1522d5d6487cSAlexander Duyck hlist_del_rcu(&old->fa_list); 1523d5d6487cSAlexander Duyck 1524d5d6487cSAlexander Duyck /* if we emptied the list this leaf will be freed and we can sort 1525d5d6487cSAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 1526d562f1f8SRobert Olsson */ 1527d5d6487cSAlexander Duyck if (hlist_empty(&l->leaf)) { 1528a52ca62cSAlexander Duyck if (tp->slen == l->slen) 1529a52ca62cSAlexander Duyck node_pull_suffix(tp, tp->pos); 153088bae714SAlexander Duyck put_child_root(tp, l->key, NULL); 1531d5d6487cSAlexander Duyck node_free(l); 1532d5d6487cSAlexander Duyck trie_rebalance(t, tp); 1533d5d6487cSAlexander Duyck return; 1534d5d6487cSAlexander Duyck } 1535d5d6487cSAlexander Duyck 1536d5d6487cSAlexander Duyck /* only access fa if it is pointing at the last valid hlist_node */ 1537d5d6487cSAlexander Duyck if (*pprev) 1538d5d6487cSAlexander Duyck return; 1539d5d6487cSAlexander Duyck 1540d5d6487cSAlexander Duyck /* update the trie with the latest suffix length */ 1541d5d6487cSAlexander Duyck l->slen = fa->fa_slen; 15421a239173SAlexander Duyck node_pull_suffix(tp, fa->fa_slen); 1543d5d6487cSAlexander Duyck } 1544d5d6487cSAlexander Duyck 1545d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1546b90eb754SJiri Pirko int fib_table_delete(struct net *net, struct fib_table *tb, 154778055998SDavid Ahern struct fib_config *cfg, struct netlink_ext_ack *extack) 154819baf839SRobert Olsson { 154919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 155019baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 155135c6edacSAlexander Duyck struct key_vector *l, *tp; 155279e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 155379e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 1554d4a975e8SAlexander Duyck u8 tos = cfg->fc_tos; 1555d4a975e8SAlexander Duyck u32 key; 155691b9a277SOlof Johansson 15574e902c57SThomas Graf key = ntohl(cfg->fc_dst); 155819baf839SRobert Olsson 155978055998SDavid Ahern if (!fib_valid_key_len(key, plen, extack)) 156019baf839SRobert Olsson return -EINVAL; 156119baf839SRobert Olsson 1562d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 156319baf839SRobert Olsson if (!l) 156419baf839SRobert Olsson return -ESRCH; 156519baf839SRobert Olsson 15660b65bd97SAlexander Duyck fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); 156719baf839SRobert Olsson if (!fa) 156819baf839SRobert Olsson return -ESRCH; 156919baf839SRobert Olsson 15700c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 157119baf839SRobert Olsson 157219baf839SRobert Olsson fa_to_delete = NULL; 157356315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 157419baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 157519baf839SRobert Olsson 15760b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 15770b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 15780b65bd97SAlexander Duyck (fa->fa_tos != tos)) 157919baf839SRobert Olsson break; 158019baf839SRobert Olsson 15814e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15824e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 158337e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 158474cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 158574cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15864e902c57SThomas Graf (!cfg->fc_protocol || 15874e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15885f9ae3d9SXin Long fib_nh_match(cfg, fi, extack) == 0 && 15895f9ae3d9SXin Long fib_metrics_match(cfg, fi)) { 159019baf839SRobert Olsson fa_to_delete = fa; 159119baf839SRobert Olsson break; 159219baf839SRobert Olsson } 159319baf839SRobert Olsson } 159419baf839SRobert Olsson 159591b9a277SOlof Johansson if (!fa_to_delete) 159691b9a277SOlof Johansson return -ESRCH; 159719baf839SRobert Olsson 1598b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, 15996c31e5a9SDavid Ahern fa_to_delete, extack); 1600d5d6487cSAlexander Duyck rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1601b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 160219baf839SRobert Olsson 160321d8c49eSDavid S. Miller if (!plen) 160421d8c49eSDavid S. Miller tb->tb_num_default--; 160521d8c49eSDavid S. Miller 1606d5d6487cSAlexander Duyck fib_remove_alias(t, tp, l, fa_to_delete); 16077289e6ddSAlexander Duyck 1608d5d6487cSAlexander Duyck if (fa_to_delete->fa_state & FA_S_ACCESSED) 16094ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 161019baf839SRobert Olsson 1611d5d6487cSAlexander Duyck fib_release_info(fa_to_delete->fa_info); 1612d5d6487cSAlexander Duyck alias_free_mem_rcu(fa_to_delete); 161319baf839SRobert Olsson return 0; 161419baf839SRobert Olsson } 161519baf839SRobert Olsson 16168be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */ 161735c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) 161819baf839SRobert Olsson { 161935c6edacSAlexander Duyck struct key_vector *pn, *n = *tn; 16208be33e95SAlexander Duyck unsigned long cindex; 162119baf839SRobert Olsson 16228be33e95SAlexander Duyck /* this loop is meant to try and find the key in the trie */ 162388bae714SAlexander Duyck do { 162488bae714SAlexander Duyck /* record parent and next child index */ 162588bae714SAlexander Duyck pn = n; 1626c2229fe1SAlexander Duyck cindex = (key > pn->key) ? get_index(key, pn) : 0; 162788bae714SAlexander Duyck 162888bae714SAlexander Duyck if (cindex >> pn->bits) 162988bae714SAlexander Duyck break; 163088bae714SAlexander Duyck 163188bae714SAlexander Duyck /* descend into the next child */ 163288bae714SAlexander Duyck n = get_child_rcu(pn, cindex++); 163388bae714SAlexander Duyck if (!n) 163488bae714SAlexander Duyck break; 16358be33e95SAlexander Duyck 16368be33e95SAlexander Duyck /* guarantee forward progress on the keys */ 16378be33e95SAlexander Duyck if (IS_LEAF(n) && (n->key >= key)) 16388be33e95SAlexander Duyck goto found; 163988bae714SAlexander Duyck } while (IS_TNODE(n)); 16408be33e95SAlexander Duyck 16418be33e95SAlexander Duyck /* this loop will search for the next leaf with a greater key */ 164288bae714SAlexander Duyck while (!IS_TRIE(pn)) { 16438be33e95SAlexander Duyck /* if we exhausted the parent node we will need to climb */ 16448be33e95SAlexander Duyck if (cindex >= (1ul << pn->bits)) { 16458be33e95SAlexander Duyck t_key pkey = pn->key; 16468be33e95SAlexander Duyck 16478be33e95SAlexander Duyck pn = node_parent_rcu(pn); 16488be33e95SAlexander Duyck cindex = get_index(pkey, pn) + 1; 16498be33e95SAlexander Duyck continue; 16508be33e95SAlexander Duyck } 16518be33e95SAlexander Duyck 16528be33e95SAlexander Duyck /* grab the next available node */ 1653754baf8dSAlexander Duyck n = get_child_rcu(pn, cindex++); 16548be33e95SAlexander Duyck if (!n) 165591b9a277SOlof Johansson continue; 165619baf839SRobert Olsson 16578be33e95SAlexander Duyck /* no need to compare keys since we bumped the index */ 16588be33e95SAlexander Duyck if (IS_LEAF(n)) 16598be33e95SAlexander Duyck goto found; 166082cfbb00SStephen Hemminger 166182cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 16628be33e95SAlexander Duyck pn = n; 16638be33e95SAlexander Duyck cindex = 0; 166419baf839SRobert Olsson } 166582cfbb00SStephen Hemminger 16668be33e95SAlexander Duyck *tn = pn; 166782cfbb00SStephen Hemminger return NULL; /* Root of trie */ 16688be33e95SAlexander Duyck found: 16698be33e95SAlexander Duyck /* if we are at the limit for keys just return NULL for the tnode */ 167088bae714SAlexander Duyck *tn = pn; 1671adaf9816SAlexander Duyck return n; 167282cfbb00SStephen Hemminger } 167382cfbb00SStephen Hemminger 16740ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb) 16750ddcf43dSAlexander Duyck { 16760ddcf43dSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 16770ddcf43dSAlexander Duyck struct key_vector *pn = t->kv; 16780ddcf43dSAlexander Duyck unsigned long cindex = 1; 16790ddcf43dSAlexander Duyck struct hlist_node *tmp; 16800ddcf43dSAlexander Duyck struct fib_alias *fa; 16810ddcf43dSAlexander Duyck 16820ddcf43dSAlexander Duyck /* walk trie in reverse order and free everything */ 16830ddcf43dSAlexander Duyck for (;;) { 16840ddcf43dSAlexander Duyck struct key_vector *n; 16850ddcf43dSAlexander Duyck 16860ddcf43dSAlexander Duyck if (!(cindex--)) { 16870ddcf43dSAlexander Duyck t_key pkey = pn->key; 16880ddcf43dSAlexander Duyck 16890ddcf43dSAlexander Duyck if (IS_TRIE(pn)) 16900ddcf43dSAlexander Duyck break; 16910ddcf43dSAlexander Duyck 16920ddcf43dSAlexander Duyck n = pn; 16930ddcf43dSAlexander Duyck pn = node_parent(pn); 16940ddcf43dSAlexander Duyck 16950ddcf43dSAlexander Duyck /* drop emptied tnode */ 16960ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16970ddcf43dSAlexander Duyck node_free(n); 16980ddcf43dSAlexander Duyck 16990ddcf43dSAlexander Duyck cindex = get_index(pkey, pn); 17000ddcf43dSAlexander Duyck 17010ddcf43dSAlexander Duyck continue; 17020ddcf43dSAlexander Duyck } 17030ddcf43dSAlexander Duyck 17040ddcf43dSAlexander Duyck /* grab the next available node */ 17050ddcf43dSAlexander Duyck n = get_child(pn, cindex); 17060ddcf43dSAlexander Duyck if (!n) 17070ddcf43dSAlexander Duyck continue; 17080ddcf43dSAlexander Duyck 17090ddcf43dSAlexander Duyck if (IS_TNODE(n)) { 17100ddcf43dSAlexander Duyck /* record pn and cindex for leaf walking */ 17110ddcf43dSAlexander Duyck pn = n; 17120ddcf43dSAlexander Duyck cindex = 1ul << n->bits; 17130ddcf43dSAlexander Duyck 17140ddcf43dSAlexander Duyck continue; 17150ddcf43dSAlexander Duyck } 17160ddcf43dSAlexander Duyck 17170ddcf43dSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 17180ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 17190ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 17200ddcf43dSAlexander Duyck } 17210ddcf43dSAlexander Duyck 17220ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 17230ddcf43dSAlexander Duyck node_free(n); 17240ddcf43dSAlexander Duyck } 17250ddcf43dSAlexander Duyck 17260ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 17270ddcf43dSAlexander Duyck free_percpu(t->stats); 17280ddcf43dSAlexander Duyck #endif 17290ddcf43dSAlexander Duyck kfree(tb); 17300ddcf43dSAlexander Duyck } 17310ddcf43dSAlexander Duyck 17320ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) 17330ddcf43dSAlexander Duyck { 17340ddcf43dSAlexander Duyck struct trie *ot = (struct trie *)oldtb->tb_data; 17350ddcf43dSAlexander Duyck struct key_vector *l, *tp = ot->kv; 17360ddcf43dSAlexander Duyck struct fib_table *local_tb; 17370ddcf43dSAlexander Duyck struct fib_alias *fa; 17380ddcf43dSAlexander Duyck struct trie *lt; 17390ddcf43dSAlexander Duyck t_key key = 0; 17400ddcf43dSAlexander Duyck 17410ddcf43dSAlexander Duyck if (oldtb->tb_data == oldtb->__data) 17420ddcf43dSAlexander Duyck return oldtb; 17430ddcf43dSAlexander Duyck 17440ddcf43dSAlexander Duyck local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); 17450ddcf43dSAlexander Duyck if (!local_tb) 17460ddcf43dSAlexander Duyck return NULL; 17470ddcf43dSAlexander Duyck 17480ddcf43dSAlexander Duyck lt = (struct trie *)local_tb->tb_data; 17490ddcf43dSAlexander Duyck 17500ddcf43dSAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 17510ddcf43dSAlexander Duyck struct key_vector *local_l = NULL, *local_tp; 17520ddcf43dSAlexander Duyck 17530ddcf43dSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 17540ddcf43dSAlexander Duyck struct fib_alias *new_fa; 17550ddcf43dSAlexander Duyck 17560ddcf43dSAlexander Duyck if (local_tb->tb_id != fa->tb_id) 17570ddcf43dSAlexander Duyck continue; 17580ddcf43dSAlexander Duyck 17590ddcf43dSAlexander Duyck /* clone fa for new local table */ 17600ddcf43dSAlexander Duyck new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 17610ddcf43dSAlexander Duyck if (!new_fa) 17620ddcf43dSAlexander Duyck goto out; 17630ddcf43dSAlexander Duyck 17640ddcf43dSAlexander Duyck memcpy(new_fa, fa, sizeof(*fa)); 17650ddcf43dSAlexander Duyck 17660ddcf43dSAlexander Duyck /* insert clone into table */ 17670ddcf43dSAlexander Duyck if (!local_l) 17680ddcf43dSAlexander Duyck local_l = fib_find_node(lt, &local_tp, l->key); 17690ddcf43dSAlexander Duyck 17700ddcf43dSAlexander Duyck if (fib_insert_alias(lt, local_tp, local_l, new_fa, 17713114cdfeSAlexander Duyck NULL, l->key)) { 17723114cdfeSAlexander Duyck kmem_cache_free(fn_alias_kmem, new_fa); 17730ddcf43dSAlexander Duyck goto out; 17740ddcf43dSAlexander Duyck } 17753114cdfeSAlexander Duyck } 17760ddcf43dSAlexander Duyck 17770ddcf43dSAlexander Duyck /* stop loop if key wrapped back to 0 */ 17780ddcf43dSAlexander Duyck key = l->key + 1; 17790ddcf43dSAlexander Duyck if (key < l->key) 17800ddcf43dSAlexander Duyck break; 17810ddcf43dSAlexander Duyck } 17820ddcf43dSAlexander Duyck 17830ddcf43dSAlexander Duyck return local_tb; 17840ddcf43dSAlexander Duyck out: 17850ddcf43dSAlexander Duyck fib_trie_free(local_tb); 17860ddcf43dSAlexander Duyck 17870ddcf43dSAlexander Duyck return NULL; 17880ddcf43dSAlexander Duyck } 17890ddcf43dSAlexander Duyck 17903b709334SAlexander Duyck /* Caller must hold RTNL */ 17913b709334SAlexander Duyck void fib_table_flush_external(struct fib_table *tb) 17923b709334SAlexander Duyck { 17933b709334SAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 17943b709334SAlexander Duyck struct key_vector *pn = t->kv; 17953b709334SAlexander Duyck unsigned long cindex = 1; 17963b709334SAlexander Duyck struct hlist_node *tmp; 17973b709334SAlexander Duyck struct fib_alias *fa; 17983b709334SAlexander Duyck 17993b709334SAlexander Duyck /* walk trie in reverse order */ 18003b709334SAlexander Duyck for (;;) { 18013b709334SAlexander Duyck unsigned char slen = 0; 18023b709334SAlexander Duyck struct key_vector *n; 18033b709334SAlexander Duyck 18043b709334SAlexander Duyck if (!(cindex--)) { 18053b709334SAlexander Duyck t_key pkey = pn->key; 18063b709334SAlexander Duyck 18073b709334SAlexander Duyck /* cannot resize the trie vector */ 18083b709334SAlexander Duyck if (IS_TRIE(pn)) 18093b709334SAlexander Duyck break; 18103b709334SAlexander Duyck 1811a52ca62cSAlexander Duyck /* update the suffix to address pulled leaves */ 1812a52ca62cSAlexander Duyck if (pn->slen > pn->pos) 1813a52ca62cSAlexander Duyck update_suffix(pn); 1814a52ca62cSAlexander Duyck 18153b709334SAlexander Duyck /* resize completed node */ 18163b709334SAlexander Duyck pn = resize(t, pn); 18173b709334SAlexander Duyck cindex = get_index(pkey, pn); 18183b709334SAlexander Duyck 18193b709334SAlexander Duyck continue; 18203b709334SAlexander Duyck } 18213b709334SAlexander Duyck 18223b709334SAlexander Duyck /* grab the next available node */ 18233b709334SAlexander Duyck n = get_child(pn, cindex); 18243b709334SAlexander Duyck if (!n) 18253b709334SAlexander Duyck continue; 18263b709334SAlexander Duyck 18273b709334SAlexander Duyck if (IS_TNODE(n)) { 18283b709334SAlexander Duyck /* record pn and cindex for leaf walking */ 18293b709334SAlexander Duyck pn = n; 18303b709334SAlexander Duyck cindex = 1ul << n->bits; 18313b709334SAlexander Duyck 18323b709334SAlexander Duyck continue; 18333b709334SAlexander Duyck } 18343b709334SAlexander Duyck 18353b709334SAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 18363b709334SAlexander Duyck /* if alias was cloned to local then we just 18373b709334SAlexander Duyck * need to remove the local copy from main 18383b709334SAlexander Duyck */ 18393b709334SAlexander Duyck if (tb->tb_id != fa->tb_id) { 18403b709334SAlexander Duyck hlist_del_rcu(&fa->fa_list); 18413b709334SAlexander Duyck alias_free_mem_rcu(fa); 18423b709334SAlexander Duyck continue; 18433b709334SAlexander Duyck } 18443b709334SAlexander Duyck 18453b709334SAlexander Duyck /* record local slen */ 18463b709334SAlexander Duyck slen = fa->fa_slen; 18473b709334SAlexander Duyck } 18483b709334SAlexander Duyck 18493b709334SAlexander Duyck /* update leaf slen */ 18503b709334SAlexander Duyck n->slen = slen; 18513b709334SAlexander Duyck 18523b709334SAlexander Duyck if (hlist_empty(&n->leaf)) { 18533b709334SAlexander Duyck put_child_root(pn, n->key, NULL); 18543b709334SAlexander Duyck node_free(n); 18553b709334SAlexander Duyck } 18563b709334SAlexander Duyck } 18573b709334SAlexander Duyck } 18583b709334SAlexander Duyck 18598be33e95SAlexander Duyck /* Caller must hold RTNL. */ 1860f97f4dd8SIdo Schimmel int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) 186119baf839SRobert Olsson { 186219baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 186388bae714SAlexander Duyck struct key_vector *pn = t->kv; 186488bae714SAlexander Duyck unsigned long cindex = 1; 18657289e6ddSAlexander Duyck struct hlist_node *tmp; 18667289e6ddSAlexander Duyck struct fib_alias *fa; 186782cfbb00SStephen Hemminger int found = 0; 186819baf839SRobert Olsson 18697289e6ddSAlexander Duyck /* walk trie in reverse order */ 187088bae714SAlexander Duyck for (;;) { 187188bae714SAlexander Duyck unsigned char slen = 0; 187288bae714SAlexander Duyck struct key_vector *n; 187388bae714SAlexander Duyck 187488bae714SAlexander Duyck if (!(cindex--)) { 18757289e6ddSAlexander Duyck t_key pkey = pn->key; 18767289e6ddSAlexander Duyck 187788bae714SAlexander Duyck /* cannot resize the trie vector */ 187888bae714SAlexander Duyck if (IS_TRIE(pn)) 187988bae714SAlexander Duyck break; 18807289e6ddSAlexander Duyck 1881a52ca62cSAlexander Duyck /* update the suffix to address pulled leaves */ 1882a52ca62cSAlexander Duyck if (pn->slen > pn->pos) 1883a52ca62cSAlexander Duyck update_suffix(pn); 1884a52ca62cSAlexander Duyck 18857289e6ddSAlexander Duyck /* resize completed node */ 188688bae714SAlexander Duyck pn = resize(t, pn); 18877289e6ddSAlexander Duyck cindex = get_index(pkey, pn); 188888bae714SAlexander Duyck 188988bae714SAlexander Duyck continue; 189064c62723SAlexander Duyck } 189164c62723SAlexander Duyck 18927289e6ddSAlexander Duyck /* grab the next available node */ 1893754baf8dSAlexander Duyck n = get_child(pn, cindex); 189488bae714SAlexander Duyck if (!n) 189588bae714SAlexander Duyck continue; 189619baf839SRobert Olsson 189788bae714SAlexander Duyck if (IS_TNODE(n)) { 189888bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 189988bae714SAlexander Duyck pn = n; 190088bae714SAlexander Duyck cindex = 1ul << n->bits; 190188bae714SAlexander Duyck 190288bae714SAlexander Duyck continue; 190388bae714SAlexander Duyck } 19047289e6ddSAlexander Duyck 19057289e6ddSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 19067289e6ddSAlexander Duyck struct fib_info *fi = fa->fa_info; 19077289e6ddSAlexander Duyck 1908f97f4dd8SIdo Schimmel if (!fi || tb->tb_id != fa->tb_id || 1909f97f4dd8SIdo Schimmel (!(fi->fib_flags & RTNH_F_DEAD) && 1910f97f4dd8SIdo Schimmel !fib_props[fa->fa_type].error)) { 1911f97f4dd8SIdo Schimmel slen = fa->fa_slen; 1912f97f4dd8SIdo Schimmel continue; 1913f97f4dd8SIdo Schimmel } 1914f97f4dd8SIdo Schimmel 1915f97f4dd8SIdo Schimmel /* Do not flush error routes if network namespace is 1916f97f4dd8SIdo Schimmel * not being dismantled 1917f97f4dd8SIdo Schimmel */ 1918f97f4dd8SIdo Schimmel if (!flush_all && fib_props[fa->fa_type].error) { 191988bae714SAlexander Duyck slen = fa->fa_slen; 192088bae714SAlexander Duyck continue; 192188bae714SAlexander Duyck } 192288bae714SAlexander Duyck 1923b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1924b90eb754SJiri Pirko n->key, 19256c31e5a9SDavid Ahern KEYLENGTH - fa->fa_slen, fa, 19266c31e5a9SDavid Ahern NULL); 19277289e6ddSAlexander Duyck hlist_del_rcu(&fa->fa_list); 19287289e6ddSAlexander Duyck fib_release_info(fa->fa_info); 19297289e6ddSAlexander Duyck alias_free_mem_rcu(fa); 19307289e6ddSAlexander Duyck found++; 19317289e6ddSAlexander Duyck } 19327289e6ddSAlexander Duyck 19337289e6ddSAlexander Duyck /* update leaf slen */ 19347289e6ddSAlexander Duyck n->slen = slen; 19357289e6ddSAlexander Duyck 19367289e6ddSAlexander Duyck if (hlist_empty(&n->leaf)) { 193788bae714SAlexander Duyck put_child_root(pn, n->key, NULL); 19387289e6ddSAlexander Duyck node_free(n); 19397289e6ddSAlexander Duyck } 194088bae714SAlexander Duyck } 19417289e6ddSAlexander Duyck 19420c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 194319baf839SRobert Olsson return found; 194419baf839SRobert Olsson } 194519baf839SRobert Olsson 19461bff1a0cSDavid Ahern /* derived from fib_trie_free */ 19471bff1a0cSDavid Ahern static void __fib_info_notify_update(struct net *net, struct fib_table *tb, 19481bff1a0cSDavid Ahern struct nl_info *info) 19491bff1a0cSDavid Ahern { 19501bff1a0cSDavid Ahern struct trie *t = (struct trie *)tb->tb_data; 19511bff1a0cSDavid Ahern struct key_vector *pn = t->kv; 19521bff1a0cSDavid Ahern unsigned long cindex = 1; 19531bff1a0cSDavid Ahern struct fib_alias *fa; 19541bff1a0cSDavid Ahern 19551bff1a0cSDavid Ahern for (;;) { 19561bff1a0cSDavid Ahern struct key_vector *n; 19571bff1a0cSDavid Ahern 19581bff1a0cSDavid Ahern if (!(cindex--)) { 19591bff1a0cSDavid Ahern t_key pkey = pn->key; 19601bff1a0cSDavid Ahern 19611bff1a0cSDavid Ahern if (IS_TRIE(pn)) 19621bff1a0cSDavid Ahern break; 19631bff1a0cSDavid Ahern 19641bff1a0cSDavid Ahern pn = node_parent(pn); 19651bff1a0cSDavid Ahern cindex = get_index(pkey, pn); 19661bff1a0cSDavid Ahern continue; 19671bff1a0cSDavid Ahern } 19681bff1a0cSDavid Ahern 19691bff1a0cSDavid Ahern /* grab the next available node */ 19701bff1a0cSDavid Ahern n = get_child(pn, cindex); 19711bff1a0cSDavid Ahern if (!n) 19721bff1a0cSDavid Ahern continue; 19731bff1a0cSDavid Ahern 19741bff1a0cSDavid Ahern if (IS_TNODE(n)) { 19751bff1a0cSDavid Ahern /* record pn and cindex for leaf walking */ 19761bff1a0cSDavid Ahern pn = n; 19771bff1a0cSDavid Ahern cindex = 1ul << n->bits; 19781bff1a0cSDavid Ahern 19791bff1a0cSDavid Ahern continue; 19801bff1a0cSDavid Ahern } 19811bff1a0cSDavid Ahern 19821bff1a0cSDavid Ahern hlist_for_each_entry(fa, &n->leaf, fa_list) { 19831bff1a0cSDavid Ahern struct fib_info *fi = fa->fa_info; 19841bff1a0cSDavid Ahern 19851bff1a0cSDavid Ahern if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id) 19861bff1a0cSDavid Ahern continue; 19871bff1a0cSDavid Ahern 19881bff1a0cSDavid Ahern rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, 19891bff1a0cSDavid Ahern KEYLENGTH - fa->fa_slen, tb->tb_id, 19901bff1a0cSDavid Ahern info, NLM_F_REPLACE); 19911bff1a0cSDavid Ahern 19921bff1a0cSDavid Ahern /* call_fib_entry_notifiers will be removed when 19931bff1a0cSDavid Ahern * in-kernel notifier is implemented and supported 19941bff1a0cSDavid Ahern * for nexthop objects 19951bff1a0cSDavid Ahern */ 19961bff1a0cSDavid Ahern call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 19971bff1a0cSDavid Ahern n->key, 19981bff1a0cSDavid Ahern KEYLENGTH - fa->fa_slen, fa, 19991bff1a0cSDavid Ahern NULL); 20001bff1a0cSDavid Ahern } 20011bff1a0cSDavid Ahern } 20021bff1a0cSDavid Ahern } 20031bff1a0cSDavid Ahern 20041bff1a0cSDavid Ahern void fib_info_notify_update(struct net *net, struct nl_info *info) 20051bff1a0cSDavid Ahern { 20061bff1a0cSDavid Ahern unsigned int h; 20071bff1a0cSDavid Ahern 20081bff1a0cSDavid Ahern for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 20091bff1a0cSDavid Ahern struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 20101bff1a0cSDavid Ahern struct fib_table *tb; 20111bff1a0cSDavid Ahern 20121bff1a0cSDavid Ahern hlist_for_each_entry_rcu(tb, head, tb_hlist) 20131bff1a0cSDavid Ahern __fib_info_notify_update(net, tb, info); 20141bff1a0cSDavid Ahern } 20151bff1a0cSDavid Ahern } 20161bff1a0cSDavid Ahern 2017c3852ef7SIdo Schimmel static void fib_leaf_notify(struct net *net, struct key_vector *l, 2018d05f7a7dSIdo Schimmel struct fib_table *tb, struct notifier_block *nb) 2019c3852ef7SIdo Schimmel { 2020c3852ef7SIdo Schimmel struct fib_alias *fa; 2021c3852ef7SIdo Schimmel 2022c3852ef7SIdo Schimmel hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 2023c3852ef7SIdo Schimmel struct fib_info *fi = fa->fa_info; 2024c3852ef7SIdo Schimmel 2025c3852ef7SIdo Schimmel if (!fi) 2026c3852ef7SIdo Schimmel continue; 2027c3852ef7SIdo Schimmel 2028c3852ef7SIdo Schimmel /* local and main table can share the same trie, 2029c3852ef7SIdo Schimmel * so don't notify twice for the same entry. 2030c3852ef7SIdo Schimmel */ 2031c3852ef7SIdo Schimmel if (tb->tb_id != fa->tb_id) 2032c3852ef7SIdo Schimmel continue; 2033c3852ef7SIdo Schimmel 2034d05f7a7dSIdo Schimmel call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, 20356eba87c7SDavid Ahern KEYLENGTH - fa->fa_slen, fa); 2036c3852ef7SIdo Schimmel } 2037c3852ef7SIdo Schimmel } 2038c3852ef7SIdo Schimmel 2039c3852ef7SIdo Schimmel static void fib_table_notify(struct net *net, struct fib_table *tb, 2040d05f7a7dSIdo Schimmel struct notifier_block *nb) 2041c3852ef7SIdo Schimmel { 2042c3852ef7SIdo Schimmel struct trie *t = (struct trie *)tb->tb_data; 2043c3852ef7SIdo Schimmel struct key_vector *l, *tp = t->kv; 2044c3852ef7SIdo Schimmel t_key key = 0; 2045c3852ef7SIdo Schimmel 2046c3852ef7SIdo Schimmel while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 2047d05f7a7dSIdo Schimmel fib_leaf_notify(net, l, tb, nb); 2048c3852ef7SIdo Schimmel 2049c3852ef7SIdo Schimmel key = l->key + 1; 2050c3852ef7SIdo Schimmel /* stop in case of wrap around */ 2051c3852ef7SIdo Schimmel if (key < l->key) 2052c3852ef7SIdo Schimmel break; 2053c3852ef7SIdo Schimmel } 2054c3852ef7SIdo Schimmel } 2055c3852ef7SIdo Schimmel 2056d05f7a7dSIdo Schimmel void fib_notify(struct net *net, struct notifier_block *nb) 2057c3852ef7SIdo Schimmel { 2058c3852ef7SIdo Schimmel unsigned int h; 2059c3852ef7SIdo Schimmel 2060c3852ef7SIdo Schimmel for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 2061c3852ef7SIdo Schimmel struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2062c3852ef7SIdo Schimmel struct fib_table *tb; 2063c3852ef7SIdo Schimmel 2064c3852ef7SIdo Schimmel hlist_for_each_entry_rcu(tb, head, tb_hlist) 2065d05f7a7dSIdo Schimmel fib_table_notify(net, tb, nb); 2066c3852ef7SIdo Schimmel } 2067c3852ef7SIdo Schimmel } 2068c3852ef7SIdo Schimmel 2069a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head) 20704aa2c466SPavel Emelyanov { 2071a7e53531SAlexander Duyck struct fib_table *tb = container_of(head, struct fib_table, rcu); 20728274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 20738274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 20748274a97aSAlexander Duyck 20750ddcf43dSAlexander Duyck if (tb->tb_data == tb->__data) 20768274a97aSAlexander Duyck free_percpu(t->stats); 20778274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 20784aa2c466SPavel Emelyanov kfree(tb); 20794aa2c466SPavel Emelyanov } 20804aa2c466SPavel Emelyanov 2081a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb) 2082a7e53531SAlexander Duyck { 2083a7e53531SAlexander Duyck call_rcu(&tb->rcu, __trie_free_rcu); 2084a7e53531SAlexander Duyck } 2085a7e53531SAlexander Duyck 208635c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 208718a8021aSDavid Ahern struct sk_buff *skb, struct netlink_callback *cb, 208818a8021aSDavid Ahern struct fib_dump_filter *filter) 208919baf839SRobert Olsson { 209018a8021aSDavid Ahern unsigned int flags = NLM_F_MULTI; 209179e5ad2cSAlexander Duyck __be32 xkey = htonl(l->key); 209219baf839SRobert Olsson struct fib_alias *fa; 209379e5ad2cSAlexander Duyck int i, s_i; 209419baf839SRobert Olsson 209518a8021aSDavid Ahern if (filter->filter_set) 209618a8021aSDavid Ahern flags |= NLM_F_DUMP_FILTERED; 209718a8021aSDavid Ahern 209879e5ad2cSAlexander Duyck s_i = cb->args[4]; 209919baf839SRobert Olsson i = 0; 210019baf839SRobert Olsson 21012373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 210279e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 2103f6c5775fSDavid Ahern int err; 2104f6c5775fSDavid Ahern 210518a8021aSDavid Ahern if (i < s_i) 210618a8021aSDavid Ahern goto next; 210719baf839SRobert Olsson 210818a8021aSDavid Ahern if (tb->tb_id != fa->tb_id) 210918a8021aSDavid Ahern goto next; 211018a8021aSDavid Ahern 211118a8021aSDavid Ahern if (filter->filter_set) { 211218a8021aSDavid Ahern if (filter->rt_type && fa->fa_type != filter->rt_type) 211318a8021aSDavid Ahern goto next; 211418a8021aSDavid Ahern 211518a8021aSDavid Ahern if ((filter->protocol && 211618a8021aSDavid Ahern fa->fa_info->fib_protocol != filter->protocol)) 211718a8021aSDavid Ahern goto next; 211818a8021aSDavid Ahern 211918a8021aSDavid Ahern if (filter->dev && 212018a8021aSDavid Ahern !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) 212118a8021aSDavid Ahern goto next; 21220ddcf43dSAlexander Duyck } 21230ddcf43dSAlexander Duyck 2124f6c5775fSDavid Ahern err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 2125f6c5775fSDavid Ahern cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2126f6c5775fSDavid Ahern tb->tb_id, fa->fa_type, 2127f6c5775fSDavid Ahern xkey, KEYLENGTH - fa->fa_slen, 212818a8021aSDavid Ahern fa->fa_tos, fa->fa_info, flags); 2129f6c5775fSDavid Ahern if (err < 0) { 213071d67e66SStephen Hemminger cb->args[4] = i; 2131f6c5775fSDavid Ahern return err; 213219baf839SRobert Olsson } 213318a8021aSDavid Ahern next: 2134a88ee229SStephen Hemminger i++; 213519baf839SRobert Olsson } 2136a88ee229SStephen Hemminger 213771d67e66SStephen Hemminger cb->args[4] = i; 213819baf839SRobert Olsson return skb->len; 213919baf839SRobert Olsson } 214019baf839SRobert Olsson 2141a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */ 214216c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 214318a8021aSDavid Ahern struct netlink_callback *cb, struct fib_dump_filter *filter) 214419baf839SRobert Olsson { 214519baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 214688bae714SAlexander Duyck struct key_vector *l, *tp = t->kv; 2147d5ce8a0eSStephen Hemminger /* Dump starting at last key. 2148d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 2149d5ce8a0eSStephen Hemminger */ 21508be33e95SAlexander Duyck int count = cb->args[2]; 21518be33e95SAlexander Duyck t_key key = cb->args[3]; 2152a88ee229SStephen Hemminger 21538be33e95SAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 2154f6c5775fSDavid Ahern int err; 2155f6c5775fSDavid Ahern 215618a8021aSDavid Ahern err = fn_trie_dump_leaf(l, tb, skb, cb, filter); 2157f6c5775fSDavid Ahern if (err < 0) { 21588be33e95SAlexander Duyck cb->args[3] = key; 21598be33e95SAlexander Duyck cb->args[2] = count; 2160f6c5775fSDavid Ahern return err; 216119baf839SRobert Olsson } 2162d5ce8a0eSStephen Hemminger 216371d67e66SStephen Hemminger ++count; 21648be33e95SAlexander Duyck key = l->key + 1; 21658be33e95SAlexander Duyck 216671d67e66SStephen Hemminger memset(&cb->args[4], 0, 216771d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 21688be33e95SAlexander Duyck 21698be33e95SAlexander Duyck /* stop loop if key wrapped back to 0 */ 21708be33e95SAlexander Duyck if (key < l->key) 21718be33e95SAlexander Duyck break; 2172a88ee229SStephen Hemminger } 21738be33e95SAlexander Duyck 21748be33e95SAlexander Duyck cb->args[3] = key; 21758be33e95SAlexander Duyck cb->args[2] = count; 21768be33e95SAlexander Duyck 2177a88ee229SStephen Hemminger return skb->len; 2178a88ee229SStephen Hemminger } 217919baf839SRobert Olsson 21805348ba85SDavid S. Miller void __init fib_trie_init(void) 21817f9b8052SStephen Hemminger { 2182a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 2183a07f5f50SStephen Hemminger sizeof(struct fib_alias), 2184bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 2185bc3c8c1eSStephen Hemminger 2186bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 218741b489fdSAlexander Duyck LEAF_SIZE, 2188bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 21897f9b8052SStephen Hemminger } 219019baf839SRobert Olsson 21910ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) 219219baf839SRobert Olsson { 219319baf839SRobert Olsson struct fib_table *tb; 219419baf839SRobert Olsson struct trie *t; 21950ddcf43dSAlexander Duyck size_t sz = sizeof(*tb); 219619baf839SRobert Olsson 21970ddcf43dSAlexander Duyck if (!alias) 21980ddcf43dSAlexander Duyck sz += sizeof(struct trie); 21990ddcf43dSAlexander Duyck 22000ddcf43dSAlexander Duyck tb = kzalloc(sz, GFP_KERNEL); 220151456b29SIan Morris if (!tb) 220219baf839SRobert Olsson return NULL; 220319baf839SRobert Olsson 220419baf839SRobert Olsson tb->tb_id = id; 220521d8c49eSDavid S. Miller tb->tb_num_default = 0; 22060ddcf43dSAlexander Duyck tb->tb_data = (alias ? alias->__data : tb->__data); 22070ddcf43dSAlexander Duyck 22080ddcf43dSAlexander Duyck if (alias) 22090ddcf43dSAlexander Duyck return tb; 221019baf839SRobert Olsson 221119baf839SRobert Olsson t = (struct trie *) tb->tb_data; 221288bae714SAlexander Duyck t->kv[0].pos = KEYLENGTH; 221388bae714SAlexander Duyck t->kv[0].slen = KEYLENGTH; 22148274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 22158274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 22168274a97aSAlexander Duyck if (!t->stats) { 22178274a97aSAlexander Duyck kfree(tb); 22188274a97aSAlexander Duyck tb = NULL; 22198274a97aSAlexander Duyck } 22208274a97aSAlexander Duyck #endif 222119baf839SRobert Olsson 222219baf839SRobert Olsson return tb; 222319baf839SRobert Olsson } 222419baf839SRobert Olsson 2225cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2226cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2227cb7b593cSStephen Hemminger struct fib_trie_iter { 22281c340b2fSDenis V. Lunev struct seq_net_private p; 22293d3b2d25SStephen Hemminger struct fib_table *tb; 223035c6edacSAlexander Duyck struct key_vector *tnode; 2231a034ee3cSEric Dumazet unsigned int index; 2232a034ee3cSEric Dumazet unsigned int depth; 2233cb7b593cSStephen Hemminger }; 223419baf839SRobert Olsson 223535c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) 223619baf839SRobert Olsson { 223798293e8dSAlexander Duyck unsigned long cindex = iter->index; 223888bae714SAlexander Duyck struct key_vector *pn = iter->tnode; 223988bae714SAlexander Duyck t_key pkey; 22406640e697SEric W. Biederman 2241cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2242cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 224319baf839SRobert Olsson 224488bae714SAlexander Duyck while (!IS_TRIE(pn)) { 224588bae714SAlexander Duyck while (cindex < child_length(pn)) { 224688bae714SAlexander Duyck struct key_vector *n = get_child_rcu(pn, cindex++); 224788bae714SAlexander Duyck 224888bae714SAlexander Duyck if (!n) 224988bae714SAlexander Duyck continue; 225088bae714SAlexander Duyck 225119baf839SRobert Olsson if (IS_LEAF(n)) { 225288bae714SAlexander Duyck iter->tnode = pn; 225388bae714SAlexander Duyck iter->index = cindex; 225491b9a277SOlof Johansson } else { 2255cb7b593cSStephen Hemminger /* push down one level */ 2256adaf9816SAlexander Duyck iter->tnode = n; 2257cb7b593cSStephen Hemminger iter->index = 0; 2258cb7b593cSStephen Hemminger ++iter->depth; 225919baf839SRobert Olsson } 226088bae714SAlexander Duyck 2261cb7b593cSStephen Hemminger return n; 226219baf839SRobert Olsson } 226319baf839SRobert Olsson 2264cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 226588bae714SAlexander Duyck pkey = pn->key; 226688bae714SAlexander Duyck pn = node_parent_rcu(pn); 226788bae714SAlexander Duyck cindex = get_index(pkey, pn) + 1; 2268cb7b593cSStephen Hemminger --iter->depth; 2269cb7b593cSStephen Hemminger } 2270cb7b593cSStephen Hemminger 227188bae714SAlexander Duyck /* record root node so further searches know we are done */ 227288bae714SAlexander Duyck iter->tnode = pn; 227388bae714SAlexander Duyck iter->index = 0; 227488bae714SAlexander Duyck 2275cb7b593cSStephen Hemminger return NULL; 2276cb7b593cSStephen Hemminger } 2277cb7b593cSStephen Hemminger 227835c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2279cb7b593cSStephen Hemminger struct trie *t) 2280cb7b593cSStephen Hemminger { 2281f38b24c9SFiro Yang struct key_vector *n, *pn; 22825ddf0eb2SRobert Olsson 22835ddf0eb2SRobert Olsson if (!t) 22845ddf0eb2SRobert Olsson return NULL; 22855ddf0eb2SRobert Olsson 2286f38b24c9SFiro Yang pn = t->kv; 228788bae714SAlexander Duyck n = rcu_dereference(pn->tnode[0]); 22883d3b2d25SStephen Hemminger if (!n) 22895ddf0eb2SRobert Olsson return NULL; 2290cb7b593cSStephen Hemminger 22916640e697SEric W. Biederman if (IS_TNODE(n)) { 2292adaf9816SAlexander Duyck iter->tnode = n; 2293cb7b593cSStephen Hemminger iter->index = 0; 22941d25cd6cSRobert Olsson iter->depth = 1; 22956640e697SEric W. Biederman } else { 229688bae714SAlexander Duyck iter->tnode = pn; 22976640e697SEric W. Biederman iter->index = 0; 22986640e697SEric W. Biederman iter->depth = 0; 22996640e697SEric W. Biederman } 23003d3b2d25SStephen Hemminger 2301cb7b593cSStephen Hemminger return n; 2302cb7b593cSStephen Hemminger } 2303cb7b593cSStephen Hemminger 2304cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 230519baf839SRobert Olsson { 230635c6edacSAlexander Duyck struct key_vector *n; 2307cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2308cb7b593cSStephen Hemminger 2309cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 231019baf839SRobert Olsson 23112373ce1cSRobert Olsson rcu_read_lock(); 23123d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2313cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 231479e5ad2cSAlexander Duyck struct fib_alias *fa; 231593672292SStephen Hemminger 231619baf839SRobert Olsson s->leaves++; 2317cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2318cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2319cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 232093672292SStephen Hemminger 232179e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) 232293672292SStephen Hemminger ++s->prefixes; 232391b9a277SOlof Johansson } else { 232419baf839SRobert Olsson s->tnodes++; 2325adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 2326adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 23276e22d174SAlexander Duyck s->nullpointers += tn_info(n)->empty_children; 232819baf839SRobert Olsson } 232998293e8dSAlexander Duyck } 23302373ce1cSRobert Olsson rcu_read_unlock(); 233119baf839SRobert Olsson } 233219baf839SRobert Olsson 233319baf839SRobert Olsson /* 233419baf839SRobert Olsson * This outputs /proc/net/fib_triestats 233519baf839SRobert Olsson */ 2336cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 233719baf839SRobert Olsson { 2338a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 233919baf839SRobert Olsson 234019baf839SRobert Olsson if (stat->leaves) 234119baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 234219baf839SRobert Olsson else 234319baf839SRobert Olsson avdepth = 0; 234419baf839SRobert Olsson 2345a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2346a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2347cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2348cb7b593cSStephen Hemminger 2349cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 235041b489fdSAlexander Duyck bytes = LEAF_SIZE * stat->leaves; 235193672292SStephen Hemminger 235293672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 235379e5ad2cSAlexander Duyck bytes += sizeof(struct fib_alias) * stat->prefixes; 235493672292SStephen Hemminger 2355187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 235641b489fdSAlexander Duyck bytes += TNODE_SIZE(0) * stat->tnodes; 235719baf839SRobert Olsson 235806ef921dSRobert Olsson max = MAX_STAT_DEPTH; 235906ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 236019baf839SRobert Olsson max--; 236119baf839SRobert Olsson 2362cb7b593cSStephen Hemminger pointers = 0; 2363f585a991SJerry Snitselaar for (i = 1; i < max; i++) 236419baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2365187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 236619baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 236719baf839SRobert Olsson } 2368cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2369187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2370cb7b593cSStephen Hemminger 237135c6edacSAlexander Duyck bytes += sizeof(struct key_vector *) * pointers; 2372187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2373187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 237466a2f7fdSStephen Hemminger } 237519baf839SRobert Olsson 237619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 237766a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 23788274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 237966a2f7fdSStephen Hemminger { 23808274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 23818274a97aSAlexander Duyck int cpu; 23828274a97aSAlexander Duyck 23838274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 23848274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 23858274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 23868274a97aSAlexander Duyck 23878274a97aSAlexander Duyck s.gets += pcpu->gets; 23888274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 23898274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 23908274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 23918274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 23928274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 23938274a97aSAlexander Duyck } 23948274a97aSAlexander Duyck 239566a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 23968274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 23978274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2398a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 23998274a97aSAlexander Duyck s.semantic_match_passed); 24008274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 24018274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 24028274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 240319baf839SRobert Olsson } 240466a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 240566a2f7fdSStephen Hemminger 24063d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2407d717a9a6SStephen Hemminger { 24083d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 24093d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 24103d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 24113d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 24123d3b2d25SStephen Hemminger else 24133d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2414d717a9a6SStephen Hemminger } 241519baf839SRobert Olsson 24163d3b2d25SStephen Hemminger 241719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 241819baf839SRobert Olsson { 24191c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 24203d3b2d25SStephen Hemminger unsigned int h; 2421877a9bffSEric W. Biederman 2422d717a9a6SStephen Hemminger seq_printf(seq, 2423a07f5f50SStephen Hemminger "Basic info: size of leaf:" 24245b5e0928SAlexey Dobriyan " %zd bytes, size of tnode: %zd bytes.\n", 242541b489fdSAlexander Duyck LEAF_SIZE, TNODE_SIZE(0)); 242619baf839SRobert Olsson 24273d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 24283d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 24293d3b2d25SStephen Hemminger struct fib_table *tb; 2430cb7b593cSStephen Hemminger 2431b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 24323d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 24333d3b2d25SStephen Hemminger struct trie_stat stat; 24343d3b2d25SStephen Hemminger 24353d3b2d25SStephen Hemminger if (!t) 24363d3b2d25SStephen Hemminger continue; 24373d3b2d25SStephen Hemminger 24383d3b2d25SStephen Hemminger fib_table_print(seq, tb); 24393d3b2d25SStephen Hemminger 24403d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 24413d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 24423d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 24438274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 24443d3b2d25SStephen Hemminger #endif 24453d3b2d25SStephen Hemminger } 24463d3b2d25SStephen Hemminger } 2447cb7b593cSStephen Hemminger 244819baf839SRobert Olsson return 0; 244919baf839SRobert Olsson } 245019baf839SRobert Olsson 245135c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 245219baf839SRobert Olsson { 24531218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 24541218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2455cb7b593cSStephen Hemminger loff_t idx = 0; 24563d3b2d25SStephen Hemminger unsigned int h; 24573d3b2d25SStephen Hemminger 24583d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 24593d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 24603d3b2d25SStephen Hemminger struct fib_table *tb; 24613d3b2d25SStephen Hemminger 2462b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 246335c6edacSAlexander Duyck struct key_vector *n; 2464cb7b593cSStephen Hemminger 24653d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 24663d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 24673d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 24683d3b2d25SStephen Hemminger if (pos == idx++) { 24693d3b2d25SStephen Hemminger iter->tb = tb; 2470cb7b593cSStephen Hemminger return n; 247119baf839SRobert Olsson } 24723d3b2d25SStephen Hemminger } 24733d3b2d25SStephen Hemminger } 247419baf839SRobert Olsson 247519baf839SRobert Olsson return NULL; 247619baf839SRobert Olsson } 247719baf839SRobert Olsson 247819baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2479c95aaf9aSStephen Hemminger __acquires(RCU) 248019baf839SRobert Olsson { 2481cb7b593cSStephen Hemminger rcu_read_lock(); 24821218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 248319baf839SRobert Olsson } 248419baf839SRobert Olsson 248519baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 248619baf839SRobert Olsson { 2487cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 24881218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 24893d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 24903d3b2d25SStephen Hemminger struct hlist_node *tb_node; 24913d3b2d25SStephen Hemminger unsigned int h; 249235c6edacSAlexander Duyck struct key_vector *n; 2493cb7b593cSStephen Hemminger 249419baf839SRobert Olsson ++*pos; 24953d3b2d25SStephen Hemminger /* next node in same table */ 24963d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 24973d3b2d25SStephen Hemminger if (n) 24983d3b2d25SStephen Hemminger return n; 249991b9a277SOlof Johansson 25003d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 25013d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 25020a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 25033d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 25043d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 25053d3b2d25SStephen Hemminger if (n) 25063d3b2d25SStephen Hemminger goto found; 25073d3b2d25SStephen Hemminger } 2508cb7b593cSStephen Hemminger 25093d3b2d25SStephen Hemminger /* new hash chain */ 25103d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 25113d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2512b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 25133d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 25143d3b2d25SStephen Hemminger if (n) 25153d3b2d25SStephen Hemminger goto found; 25163d3b2d25SStephen Hemminger } 25173d3b2d25SStephen Hemminger } 2518cb7b593cSStephen Hemminger return NULL; 25193d3b2d25SStephen Hemminger 25203d3b2d25SStephen Hemminger found: 25213d3b2d25SStephen Hemminger iter->tb = tb; 25223d3b2d25SStephen Hemminger return n; 252319baf839SRobert Olsson } 252419baf839SRobert Olsson 252519baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2526c95aaf9aSStephen Hemminger __releases(RCU) 252719baf839SRobert Olsson { 2528cb7b593cSStephen Hemminger rcu_read_unlock(); 252919baf839SRobert Olsson } 253019baf839SRobert Olsson 2531cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2532cb7b593cSStephen Hemminger { 2533a034ee3cSEric Dumazet while (n-- > 0) 2534a034ee3cSEric Dumazet seq_puts(seq, " "); 2535cb7b593cSStephen Hemminger } 253619baf839SRobert Olsson 253728d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2538cb7b593cSStephen Hemminger { 2539cb7b593cSStephen Hemminger switch (s) { 2540cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2541cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2542cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2543cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2544cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2545cb7b593cSStephen Hemminger default: 254628d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2547cb7b593cSStephen Hemminger return buf; 2548cb7b593cSStephen Hemminger } 2549cb7b593cSStephen Hemminger } 2550cb7b593cSStephen Hemminger 255136cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2552cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2553cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2554cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2555cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2556cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2557cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2558cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2559cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2560cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2561cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2562cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2563cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2564cb7b593cSStephen Hemminger }; 2565cb7b593cSStephen Hemminger 2566a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2567cb7b593cSStephen Hemminger { 2568cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2569cb7b593cSStephen Hemminger return rtn_type_names[t]; 257028d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2571cb7b593cSStephen Hemminger return buf; 2572cb7b593cSStephen Hemminger } 2573cb7b593cSStephen Hemminger 2574cb7b593cSStephen Hemminger /* Pretty print the trie */ 257519baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 257619baf839SRobert Olsson { 2577cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 257835c6edacSAlexander Duyck struct key_vector *n = v; 257919baf839SRobert Olsson 258088bae714SAlexander Duyck if (IS_TRIE(node_parent_rcu(n))) 25813d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2582095b8501SRobert Olsson 2583095b8501SRobert Olsson if (IS_TNODE(n)) { 2584adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2585095b8501SRobert Olsson 25861d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2587e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2588e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 25896e22d174SAlexander Duyck tn_info(n)->full_children, 25906e22d174SAlexander Duyck tn_info(n)->empty_children); 2591cb7b593cSStephen Hemminger } else { 2592adaf9816SAlexander Duyck __be32 val = htonl(n->key); 259379e5ad2cSAlexander Duyck struct fib_alias *fa; 2594cb7b593cSStephen Hemminger 2595cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2596673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 259728d36e37SEric Dumazet 259879e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 259928d36e37SEric Dumazet char buf1[32], buf2[32]; 260028d36e37SEric Dumazet 2601cb7b593cSStephen Hemminger seq_indent(seq, iter->depth + 1); 26025786ec60SAlexander Duyck seq_printf(seq, " /%zu %s %s", 26039b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 260428d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 260537e826c5SDavid S. Miller fa->fa_info->fib_scope), 260628d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 260728d36e37SEric Dumazet fa->fa_type)); 2608cb7b593cSStephen Hemminger if (fa->fa_tos) 2609b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2610cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2611cb7b593cSStephen Hemminger } 2612cb7b593cSStephen Hemminger } 261319baf839SRobert Olsson 261419baf839SRobert Olsson return 0; 261519baf839SRobert Olsson } 261619baf839SRobert Olsson 2617f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 261819baf839SRobert Olsson .start = fib_trie_seq_start, 261919baf839SRobert Olsson .next = fib_trie_seq_next, 262019baf839SRobert Olsson .stop = fib_trie_seq_stop, 262119baf839SRobert Olsson .show = fib_trie_seq_show, 262219baf839SRobert Olsson }; 262319baf839SRobert Olsson 26248315f5d8SStephen Hemminger struct fib_route_iter { 26258315f5d8SStephen Hemminger struct seq_net_private p; 26268be33e95SAlexander Duyck struct fib_table *main_tb; 262735c6edacSAlexander Duyck struct key_vector *tnode; 26288315f5d8SStephen Hemminger loff_t pos; 26298315f5d8SStephen Hemminger t_key key; 26308315f5d8SStephen Hemminger }; 26318315f5d8SStephen Hemminger 263235c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, 263335c6edacSAlexander Duyck loff_t pos) 26348315f5d8SStephen Hemminger { 263535c6edacSAlexander Duyck struct key_vector *l, **tp = &iter->tnode; 26368be33e95SAlexander Duyck t_key key; 26378315f5d8SStephen Hemminger 2638fd0285a3SAlexander Duyck /* use cached location of previously found key */ 26398be33e95SAlexander Duyck if (iter->pos > 0 && pos >= iter->pos) { 26408be33e95SAlexander Duyck key = iter->key; 26418be33e95SAlexander Duyck } else { 2642fd0285a3SAlexander Duyck iter->pos = 1; 26438be33e95SAlexander Duyck key = 0; 26448315f5d8SStephen Hemminger } 26458315f5d8SStephen Hemminger 2646fd0285a3SAlexander Duyck pos -= iter->pos; 2647fd0285a3SAlexander Duyck 2648fd0285a3SAlexander Duyck while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { 26498be33e95SAlexander Duyck key = l->key + 1; 26508315f5d8SStephen Hemminger iter->pos++; 26518be33e95SAlexander Duyck l = NULL; 26528be33e95SAlexander Duyck 26538be33e95SAlexander Duyck /* handle unlikely case of a key wrap */ 26548be33e95SAlexander Duyck if (!key) 26558be33e95SAlexander Duyck break; 26568315f5d8SStephen Hemminger } 26578315f5d8SStephen Hemminger 26588315f5d8SStephen Hemminger if (l) 2659fd0285a3SAlexander Duyck iter->key = l->key; /* remember it */ 26608315f5d8SStephen Hemminger else 26618315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 26628315f5d8SStephen Hemminger 26638315f5d8SStephen Hemminger return l; 26648315f5d8SStephen Hemminger } 26658315f5d8SStephen Hemminger 26668315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 26678315f5d8SStephen Hemminger __acquires(RCU) 26688315f5d8SStephen Hemminger { 26698315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 26708315f5d8SStephen Hemminger struct fib_table *tb; 26718be33e95SAlexander Duyck struct trie *t; 26728315f5d8SStephen Hemminger 26738315f5d8SStephen Hemminger rcu_read_lock(); 26748be33e95SAlexander Duyck 26751218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 26768315f5d8SStephen Hemminger if (!tb) 26778315f5d8SStephen Hemminger return NULL; 26788315f5d8SStephen Hemminger 26798be33e95SAlexander Duyck iter->main_tb = tb; 268094d9f1c5SDavid Forster t = (struct trie *)tb->tb_data; 268194d9f1c5SDavid Forster iter->tnode = t->kv; 26828be33e95SAlexander Duyck 26838be33e95SAlexander Duyck if (*pos != 0) 26848be33e95SAlexander Duyck return fib_route_get_idx(iter, *pos); 26858be33e95SAlexander Duyck 26868be33e95SAlexander Duyck iter->pos = 0; 2687fd0285a3SAlexander Duyck iter->key = KEY_MAX; 26888be33e95SAlexander Duyck 26898315f5d8SStephen Hemminger return SEQ_START_TOKEN; 26908315f5d8SStephen Hemminger } 26918315f5d8SStephen Hemminger 26928315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 26938315f5d8SStephen Hemminger { 26948315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 269535c6edacSAlexander Duyck struct key_vector *l = NULL; 2696fd0285a3SAlexander Duyck t_key key = iter->key + 1; 26978315f5d8SStephen Hemminger 26988315f5d8SStephen Hemminger ++*pos; 26998be33e95SAlexander Duyck 27008be33e95SAlexander Duyck /* only allow key of 0 for start of sequence */ 27018be33e95SAlexander Duyck if ((v == SEQ_START_TOKEN) || key) 27028be33e95SAlexander Duyck l = leaf_walk_rcu(&iter->tnode, key); 27038be33e95SAlexander Duyck 27048be33e95SAlexander Duyck if (l) { 2705fd0285a3SAlexander Duyck iter->key = l->key; 27068315f5d8SStephen Hemminger iter->pos++; 27078be33e95SAlexander Duyck } else { 27088be33e95SAlexander Duyck iter->pos = 0; 27098315f5d8SStephen Hemminger } 27108315f5d8SStephen Hemminger 27118315f5d8SStephen Hemminger return l; 27128315f5d8SStephen Hemminger } 27138315f5d8SStephen Hemminger 27148315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 27158315f5d8SStephen Hemminger __releases(RCU) 27168315f5d8SStephen Hemminger { 27178315f5d8SStephen Hemminger rcu_read_unlock(); 27188315f5d8SStephen Hemminger } 27198315f5d8SStephen Hemminger 27205481d73fSDavid Ahern static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi) 2721cb7b593cSStephen Hemminger { 2722a034ee3cSEric Dumazet unsigned int flags = 0; 2723cb7b593cSStephen Hemminger 2724a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2725a034ee3cSEric Dumazet flags = RTF_REJECT; 27265481d73fSDavid Ahern if (fi) { 27275481d73fSDavid Ahern const struct fib_nh *nh = fib_info_nh(fi, 0); 27285481d73fSDavid Ahern 27295481d73fSDavid Ahern if (nh->fib_nh_gw4) 2730cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 27315481d73fSDavid Ahern } 273232ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2733cb7b593cSStephen Hemminger flags |= RTF_HOST; 2734cb7b593cSStephen Hemminger flags |= RTF_UP; 2735cb7b593cSStephen Hemminger return flags; 2736cb7b593cSStephen Hemminger } 2737cb7b593cSStephen Hemminger 2738cb7b593cSStephen Hemminger /* 2739cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2740cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2741cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2742cb7b593cSStephen Hemminger * legacy utilities 2743cb7b593cSStephen Hemminger */ 2744cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2745cb7b593cSStephen Hemminger { 2746654eff45SAlexander Duyck struct fib_route_iter *iter = seq->private; 2747654eff45SAlexander Duyck struct fib_table *tb = iter->main_tb; 274879e5ad2cSAlexander Duyck struct fib_alias *fa; 274935c6edacSAlexander Duyck struct key_vector *l = v; 27509b6ebad5SAlexander Duyck __be32 prefix; 2751cb7b593cSStephen Hemminger 2752cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2753cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2754cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2755cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2756cb7b593cSStephen Hemminger return 0; 2757cb7b593cSStephen Hemminger } 2758cb7b593cSStephen Hemminger 27599b6ebad5SAlexander Duyck prefix = htonl(l->key); 27609b6ebad5SAlexander Duyck 276179e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 27625481d73fSDavid Ahern struct fib_info *fi = fa->fa_info; 27639b6ebad5SAlexander Duyck __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); 2764a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2765cb7b593cSStephen Hemminger 276679e5ad2cSAlexander Duyck if ((fa->fa_type == RTN_BROADCAST) || 276779e5ad2cSAlexander Duyck (fa->fa_type == RTN_MULTICAST)) 2768cb7b593cSStephen Hemminger continue; 2769cb7b593cSStephen Hemminger 2770654eff45SAlexander Duyck if (fa->tb_id != tb->tb_id) 2771654eff45SAlexander Duyck continue; 2772654eff45SAlexander Duyck 2773652586dfSTetsuo Handa seq_setwidth(seq, 127); 2774652586dfSTetsuo Handa 27755481d73fSDavid Ahern if (fi) { 27765481d73fSDavid Ahern struct fib_nh *nh = fib_info_nh(fi, 0); 27775481d73fSDavid Ahern 27785e659e4cSPavel Emelyanov seq_printf(seq, 27795e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2780652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 27815481d73fSDavid Ahern nh->fib_nh_dev ? nh->fib_nh_dev->name : "*", 2782cb7b593cSStephen Hemminger prefix, 27835481d73fSDavid Ahern nh->fib_nh_gw4, flags, 0, 0, 2784cb7b593cSStephen Hemminger fi->fib_priority, 2785cb7b593cSStephen Hemminger mask, 2786a07f5f50SStephen Hemminger (fi->fib_advmss ? 2787a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2788cb7b593cSStephen Hemminger fi->fib_window, 2789652586dfSTetsuo Handa fi->fib_rtt >> 3); 27905481d73fSDavid Ahern } else { 27915e659e4cSPavel Emelyanov seq_printf(seq, 27925e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2793652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2794cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2795652586dfSTetsuo Handa mask, 0, 0, 0); 27965481d73fSDavid Ahern } 2797652586dfSTetsuo Handa seq_pad(seq, '\n'); 2798cb7b593cSStephen Hemminger } 2799cb7b593cSStephen Hemminger 2800cb7b593cSStephen Hemminger return 0; 2801cb7b593cSStephen Hemminger } 2802cb7b593cSStephen Hemminger 2803f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 28048315f5d8SStephen Hemminger .start = fib_route_seq_start, 28058315f5d8SStephen Hemminger .next = fib_route_seq_next, 28068315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2807cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2808cb7b593cSStephen Hemminger }; 2809cb7b593cSStephen Hemminger 281061a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 281119baf839SRobert Olsson { 2812c3506372SChristoph Hellwig if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, 2813c3506372SChristoph Hellwig sizeof(struct fib_trie_iter))) 2814cb7b593cSStephen Hemminger goto out1; 2815cb7b593cSStephen Hemminger 28163617d949SChristoph Hellwig if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, 28173617d949SChristoph Hellwig fib_triestat_seq_show, NULL)) 2818cb7b593cSStephen Hemminger goto out2; 2819cb7b593cSStephen Hemminger 2820c3506372SChristoph Hellwig if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops, 2821c3506372SChristoph Hellwig sizeof(struct fib_route_iter))) 2822cb7b593cSStephen Hemminger goto out3; 2823cb7b593cSStephen Hemminger 282419baf839SRobert Olsson return 0; 2825cb7b593cSStephen Hemminger 2826cb7b593cSStephen Hemminger out3: 2827ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2828cb7b593cSStephen Hemminger out2: 2829ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2830cb7b593cSStephen Hemminger out1: 2831cb7b593cSStephen Hemminger return -ENOMEM; 283219baf839SRobert Olsson } 283319baf839SRobert Olsson 283461a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 283519baf839SRobert Olsson { 2836ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2837ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2838ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 283919baf839SRobert Olsson } 284019baf839SRobert Olsson 284119baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2842