119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 541977f032SJiri Slaby #include <linux/bitops.h> 5519baf839SRobert Olsson #include <linux/types.h> 5619baf839SRobert Olsson #include <linux/kernel.h> 5719baf839SRobert Olsson #include <linux/mm.h> 5819baf839SRobert Olsson #include <linux/string.h> 5919baf839SRobert Olsson #include <linux/socket.h> 6019baf839SRobert Olsson #include <linux/sockios.h> 6119baf839SRobert Olsson #include <linux/errno.h> 6219baf839SRobert Olsson #include <linux/in.h> 6319baf839SRobert Olsson #include <linux/inet.h> 64cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6519baf839SRobert Olsson #include <linux/netdevice.h> 6619baf839SRobert Olsson #include <linux/if_arp.h> 6719baf839SRobert Olsson #include <linux/proc_fs.h> 682373ce1cSRobert Olsson #include <linux/rcupdate.h> 6919baf839SRobert Olsson #include <linux/skbuff.h> 7019baf839SRobert Olsson #include <linux/netlink.h> 7119baf839SRobert Olsson #include <linux/init.h> 7219baf839SRobert Olsson #include <linux/list.h> 735a0e3ad6STejun Heo #include <linux/slab.h> 74bc3b2d7fSPaul Gortmaker #include <linux/export.h> 75ffa915d0SDavid S. Miller #include <linux/vmalloc.h> 76457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7719baf839SRobert Olsson #include <net/ip.h> 7819baf839SRobert Olsson #include <net/protocol.h> 7919baf839SRobert Olsson #include <net/route.h> 8019baf839SRobert Olsson #include <net/tcp.h> 8119baf839SRobert Olsson #include <net/sock.h> 8219baf839SRobert Olsson #include <net/ip_fib.h> 838e05fd71SScott Feldman #include <net/switchdev.h> 84f6d3c192SDavid Ahern #include <trace/events/fib.h> 8519baf839SRobert Olsson #include "fib_lookup.h" 8619baf839SRobert Olsson 8706ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8819baf839SRobert Olsson 8919baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 9095f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 9119baf839SRobert Olsson 9219baf839SRobert Olsson typedef unsigned int t_key; 9319baf839SRobert Olsson 9488bae714SAlexander Duyck #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) 9564c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 9664c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 972373ce1cSRobert Olsson 9835c6edacSAlexander Duyck struct key_vector { 9941b489fdSAlexander Duyck t_key key; 10041b489fdSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 10141b489fdSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 10241b489fdSAlexander Duyck unsigned char slen; 10341b489fdSAlexander Duyck union { 10441b489fdSAlexander Duyck /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ 10579e5ad2cSAlexander Duyck struct hlist_head leaf; 10641b489fdSAlexander Duyck /* This array is valid if (pos | bits) > 0 (TNODE) */ 10735c6edacSAlexander Duyck struct key_vector __rcu *tnode[0]; 10819baf839SRobert Olsson }; 109adaf9816SAlexander Duyck }; 11019baf839SRobert Olsson 111dc35dbedSAlexander Duyck struct tnode { 11256ca2adfSAlexander Duyck struct rcu_head rcu; 1136e22d174SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 1146e22d174SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 115f23e59fbSAlexander Duyck struct key_vector __rcu *parent; 116dc35dbedSAlexander Duyck struct key_vector kv[1]; 11756ca2adfSAlexander Duyck #define tn_bits kv[0].bits 118dc35dbedSAlexander Duyck }; 119dc35dbedSAlexander Duyck 120dc35dbedSAlexander Duyck #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) 12141b489fdSAlexander Duyck #define LEAF_SIZE TNODE_SIZE(1) 12241b489fdSAlexander Duyck 12319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 12419baf839SRobert Olsson struct trie_use_stats { 12519baf839SRobert Olsson unsigned int gets; 12619baf839SRobert Olsson unsigned int backtrack; 12719baf839SRobert Olsson unsigned int semantic_match_passed; 12819baf839SRobert Olsson unsigned int semantic_match_miss; 12919baf839SRobert Olsson unsigned int null_node_hit; 1302f36895aSRobert Olsson unsigned int resize_node_skipped; 13119baf839SRobert Olsson }; 13219baf839SRobert Olsson #endif 13319baf839SRobert Olsson 13419baf839SRobert Olsson struct trie_stat { 13519baf839SRobert Olsson unsigned int totdepth; 13619baf839SRobert Olsson unsigned int maxdepth; 13719baf839SRobert Olsson unsigned int tnodes; 13819baf839SRobert Olsson unsigned int leaves; 13919baf839SRobert Olsson unsigned int nullpointers; 14093672292SStephen Hemminger unsigned int prefixes; 14106ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 14219baf839SRobert Olsson }; 14319baf839SRobert Olsson 14419baf839SRobert Olsson struct trie { 14588bae714SAlexander Duyck struct key_vector kv[1]; 14619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1478274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 14819baf839SRobert Olsson #endif 14919baf839SRobert Olsson }; 15019baf839SRobert Olsson 15188bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn); 152c3059477SJarek Poplawski static size_t tnode_free_size; 153c3059477SJarek Poplawski 154c3059477SJarek Poplawski /* 155c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 156c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 157c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 158c3059477SJarek Poplawski */ 159c3059477SJarek Poplawski static const int sync_pages = 128; 16019baf839SRobert Olsson 161e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 162bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 16319baf839SRobert Olsson 16456ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv) 16556ca2adfSAlexander Duyck { 16656ca2adfSAlexander Duyck return container_of(kv, struct tnode, kv[0]); 16756ca2adfSAlexander Duyck } 16856ca2adfSAlexander Duyck 16964c9b6fbSAlexander Duyck /* caller must hold RTNL */ 170f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) 171754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) 1720a5c0475SEric Dumazet 17364c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 174f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) 175754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) 1760a5c0475SEric Dumazet 17764c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 17835c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) 17906801916SStephen Hemminger { 180adaf9816SAlexander Duyck if (n) 181f23e59fbSAlexander Duyck rcu_assign_pointer(tn_info(n)->parent, tp); 18264c9b6fbSAlexander Duyck } 18364c9b6fbSAlexander Duyck 184f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) 18564c9b6fbSAlexander Duyck 18664c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 18764c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 18864c9b6fbSAlexander Duyck */ 1892e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn) 19064c9b6fbSAlexander Duyck { 19164c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 19206801916SStephen Hemminger } 1932373ce1cSRobert Olsson 19488bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) 19588bae714SAlexander Duyck 1962e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv) 1972e1ac88aSAlexander Duyck { 1982e1ac88aSAlexander Duyck unsigned long index = key ^ kv->key; 1992e1ac88aSAlexander Duyck 20088bae714SAlexander Duyck if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) 20188bae714SAlexander Duyck return 0; 20288bae714SAlexander Duyck 2032e1ac88aSAlexander Duyck return index >> kv->pos; 2042e1ac88aSAlexander Duyck } 2052e1ac88aSAlexander Duyck 206e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 207e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 208e9b44019SAlexander Duyck * all of the bits in that key are significant. 209e9b44019SAlexander Duyck * 210e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 211e9b44019SAlexander Duyck * 212e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 213e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 214e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 215e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 216e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 217e9b44019SAlexander Duyck * correct key path. 218e9b44019SAlexander Duyck * 219e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 220e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 221e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 222e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 223e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 224e9b44019SAlexander Duyck * 225e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 226e9b44019SAlexander Duyck * have many different meanings. 227e9b44019SAlexander Duyck * 228e9b44019SAlexander Duyck * Example: 229e9b44019SAlexander Duyck * _________________________________________________________________ 230e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 231e9b44019SAlexander Duyck * ----------------------------------------------------------------- 232e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 233e9b44019SAlexander Duyck * 234e9b44019SAlexander Duyck * _________________________________________________________________ 235e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 236e9b44019SAlexander Duyck * ----------------------------------------------------------------- 237e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 238e9b44019SAlexander Duyck * 239e9b44019SAlexander Duyck * tp->pos = 22 240e9b44019SAlexander Duyck * tp->bits = 3 241e9b44019SAlexander Duyck * n->pos = 13 242e9b44019SAlexander Duyck * n->bits = 4 243e9b44019SAlexander Duyck * 244e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 245e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 246e9b44019SAlexander Duyck * point we do not use them for anything. 247e9b44019SAlexander Duyck * 248e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 249e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 250e9b44019SAlexander Duyck * 'n' among tp's children. 251e9b44019SAlexander Duyck * 25298a384ecSXunlei Pang * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits 253e9b44019SAlexander Duyck * for the node n. 254e9b44019SAlexander Duyck * 255e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 256e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 257e9b44019SAlexander Duyck * 258e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 259e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 260e9b44019SAlexander Duyck * 26198a384ecSXunlei Pang * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown 262e9b44019SAlexander Duyck * at this point. 26319baf839SRobert Olsson */ 26419baf839SRobert Olsson 265f5026fabSDenis V. Lunev static const int halve_threshold = 25; 266f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 267345aa031SJarek Poplawski static const int halve_threshold_root = 15; 26880b71b80SJens Låås static const int inflate_threshold_root = 30; 2692373ce1cSRobert Olsson 2702373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 2712373ce1cSRobert Olsson { 2722373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 2732373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 2742373ce1cSRobert Olsson } 2752373ce1cSRobert Olsson 2762373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 2772373ce1cSRobert Olsson { 2782373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 2792373ce1cSRobert Olsson } 2802373ce1cSRobert Olsson 28137fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 28235c6edacSAlexander Duyck ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 2831de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \ 28435c6edacSAlexander Duyck ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 28537fd30f2SAlexander Duyck 28637fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 2872373ce1cSRobert Olsson { 28856ca2adfSAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 28937fd30f2SAlexander Duyck 29056ca2adfSAlexander Duyck if (!n->tn_bits) 29137fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 29237fd30f2SAlexander Duyck else 2931d5cfdb0STetsuo Handa kvfree(n); 2942373ce1cSRobert Olsson } 2952373ce1cSRobert Olsson 29656ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 297387a5487SStephen Hemminger 298dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits) 2992373ce1cSRobert Olsson { 3001de3d87bSAlexander Duyck size_t size; 3011de3d87bSAlexander Duyck 3021de3d87bSAlexander Duyck /* verify bits is within bounds */ 3031de3d87bSAlexander Duyck if (bits > TNODE_VMALLOC_MAX) 3041de3d87bSAlexander Duyck return NULL; 3051de3d87bSAlexander Duyck 3061de3d87bSAlexander Duyck /* determine size and verify it is non-zero and didn't overflow */ 3071de3d87bSAlexander Duyck size = TNODE_SIZE(1ul << bits); 3081de3d87bSAlexander Duyck 3092373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3108d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 31115be75cdSStephen Hemminger else 3127a1c8e5aSEric Dumazet return vzalloc(size); 31315be75cdSStephen Hemminger } 3142373ce1cSRobert Olsson 31535c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n) 31695f60ea3SAlexander Duyck { 3176e22d174SAlexander Duyck ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; 31895f60ea3SAlexander Duyck } 31995f60ea3SAlexander Duyck 32035c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n) 32195f60ea3SAlexander Duyck { 3226e22d174SAlexander Duyck tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; 32395f60ea3SAlexander Duyck } 32495f60ea3SAlexander Duyck 32535c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 32619baf839SRobert Olsson { 327f38b24c9SFiro Yang struct key_vector *l; 328f38b24c9SFiro Yang struct tnode *kv; 329dc35dbedSAlexander Duyck 330f38b24c9SFiro Yang kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 331dc35dbedSAlexander Duyck if (!kv) 332dc35dbedSAlexander Duyck return NULL; 333dc35dbedSAlexander Duyck 334dc35dbedSAlexander Duyck /* initialize key vector */ 335f38b24c9SFiro Yang l = kv->kv; 33664c9b6fbSAlexander Duyck l->key = key; 337e9b44019SAlexander Duyck l->pos = 0; 33864c9b6fbSAlexander Duyck l->bits = 0; 339dc35dbedSAlexander Duyck l->slen = fa->fa_slen; 34064c9b6fbSAlexander Duyck 341d5d6487cSAlexander Duyck /* link leaf to fib alias */ 34279e5ad2cSAlexander Duyck INIT_HLIST_HEAD(&l->leaf); 343d5d6487cSAlexander Duyck hlist_add_head(&fa->fa_list, &l->leaf); 344dc35dbedSAlexander Duyck 34519baf839SRobert Olsson return l; 34619baf839SRobert Olsson } 34719baf839SRobert Olsson 34835c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits) 34919baf839SRobert Olsson { 35064c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 351f38b24c9SFiro Yang struct key_vector *tn; 352f38b24c9SFiro Yang struct tnode *tnode; 35364c9b6fbSAlexander Duyck 35464c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 35564c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 35619baf839SRobert Olsson 357f38b24c9SFiro Yang tnode = tnode_alloc(bits); 358dc35dbedSAlexander Duyck if (!tnode) 359dc35dbedSAlexander Duyck return NULL; 360dc35dbedSAlexander Duyck 361f38b24c9SFiro Yang pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 362f38b24c9SFiro Yang sizeof(struct key_vector *) << bits); 363f38b24c9SFiro Yang 36495f60ea3SAlexander Duyck if (bits == KEYLENGTH) 3656e22d174SAlexander Duyck tnode->full_children = 1; 36695f60ea3SAlexander Duyck else 3676e22d174SAlexander Duyck tnode->empty_children = 1ul << bits; 368c877efb2SStephen Hemminger 369f38b24c9SFiro Yang tn = tnode->kv; 370dc35dbedSAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 371dc35dbedSAlexander Duyck tn->pos = pos; 372dc35dbedSAlexander Duyck tn->bits = bits; 373dc35dbedSAlexander Duyck tn->slen = pos; 374dc35dbedSAlexander Duyck 37519baf839SRobert Olsson return tn; 37619baf839SRobert Olsson } 37719baf839SRobert Olsson 378e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 37919baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 38019baf839SRobert Olsson */ 38135c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n) 38219baf839SRobert Olsson { 383e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 38419baf839SRobert Olsson } 38519baf839SRobert Olsson 386ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 38719baf839SRobert Olsson * Update the value of full_children and empty_children. 38819baf839SRobert Olsson */ 38935c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i, 39035c6edacSAlexander Duyck struct key_vector *n) 39119baf839SRobert Olsson { 392754baf8dSAlexander Duyck struct key_vector *chi = get_child(tn, i); 393ff181ed8SAlexander Duyck int isfull, wasfull; 39419baf839SRobert Olsson 3952e1ac88aSAlexander Duyck BUG_ON(i >= child_length(tn)); 3960c7770c7SStephen Hemminger 39795f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 39800db4124SIan Morris if (!n && chi) 39995f60ea3SAlexander Duyck empty_child_inc(tn); 40000db4124SIan Morris if (n && !chi) 40195f60ea3SAlexander Duyck empty_child_dec(tn); 40219baf839SRobert Olsson 40319baf839SRobert Olsson /* update fullChildren */ 40419baf839SRobert Olsson wasfull = tnode_full(tn, chi); 40519baf839SRobert Olsson isfull = tnode_full(tn, n); 406ff181ed8SAlexander Duyck 40719baf839SRobert Olsson if (wasfull && !isfull) 4086e22d174SAlexander Duyck tn_info(tn)->full_children--; 40919baf839SRobert Olsson else if (!wasfull && isfull) 4106e22d174SAlexander Duyck tn_info(tn)->full_children++; 41191b9a277SOlof Johansson 4125405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4135405afd1SAlexander Duyck tn->slen = n->slen; 4145405afd1SAlexander Duyck 41541b489fdSAlexander Duyck rcu_assign_pointer(tn->tnode[i], n); 41619baf839SRobert Olsson } 41719baf839SRobert Olsson 41835c6edacSAlexander Duyck static void update_children(struct key_vector *tn) 41969fa57b1SAlexander Duyck { 42069fa57b1SAlexander Duyck unsigned long i; 42169fa57b1SAlexander Duyck 42269fa57b1SAlexander Duyck /* update all of the child parent pointers */ 4232e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 424754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 42569fa57b1SAlexander Duyck 42669fa57b1SAlexander Duyck if (!inode) 42769fa57b1SAlexander Duyck continue; 42869fa57b1SAlexander Duyck 42969fa57b1SAlexander Duyck /* Either update the children of a tnode that 43069fa57b1SAlexander Duyck * already belongs to us or update the child 43169fa57b1SAlexander Duyck * to point to ourselves. 43269fa57b1SAlexander Duyck */ 43369fa57b1SAlexander Duyck if (node_parent(inode) == tn) 43469fa57b1SAlexander Duyck update_children(inode); 43569fa57b1SAlexander Duyck else 43669fa57b1SAlexander Duyck node_set_parent(inode, tn); 43769fa57b1SAlexander Duyck } 43869fa57b1SAlexander Duyck } 43969fa57b1SAlexander Duyck 44088bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key, 44188bae714SAlexander Duyck struct key_vector *n) 442836a0123SAlexander Duyck { 44388bae714SAlexander Duyck if (IS_TRIE(tp)) 44488bae714SAlexander Duyck rcu_assign_pointer(tp->tnode[0], n); 445836a0123SAlexander Duyck else 44688bae714SAlexander Duyck put_child(tp, get_index(key, tp), n); 447836a0123SAlexander Duyck } 448836a0123SAlexander Duyck 44935c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn) 4500a5c0475SEric Dumazet { 45156ca2adfSAlexander Duyck tn_info(tn)->rcu.next = NULL; 4520a5c0475SEric Dumazet } 453fc86a93bSAlexander Duyck 45435c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn, 45535c6edacSAlexander Duyck struct key_vector *n) 456fc86a93bSAlexander Duyck { 45756ca2adfSAlexander Duyck tn_info(n)->rcu.next = tn_info(tn)->rcu.next; 45856ca2adfSAlexander Duyck tn_info(tn)->rcu.next = &tn_info(n)->rcu; 459fc86a93bSAlexander Duyck } 460fc86a93bSAlexander Duyck 46135c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn) 462fc86a93bSAlexander Duyck { 46356ca2adfSAlexander Duyck struct callback_head *head = &tn_info(tn)->rcu; 464fc86a93bSAlexander Duyck 465fc86a93bSAlexander Duyck while (head) { 466fc86a93bSAlexander Duyck head = head->next; 46741b489fdSAlexander Duyck tnode_free_size += TNODE_SIZE(1ul << tn->bits); 46837fd30f2SAlexander Duyck node_free(tn); 469fc86a93bSAlexander Duyck 47056ca2adfSAlexander Duyck tn = container_of(head, struct tnode, rcu)->kv; 471fc86a93bSAlexander Duyck } 472fc86a93bSAlexander Duyck 473fc86a93bSAlexander Duyck if (tnode_free_size >= PAGE_SIZE * sync_pages) { 474fc86a93bSAlexander Duyck tnode_free_size = 0; 475fc86a93bSAlexander Duyck synchronize_rcu(); 476fc86a93bSAlexander Duyck } 4770a5c0475SEric Dumazet } 4780a5c0475SEric Dumazet 47988bae714SAlexander Duyck static struct key_vector *replace(struct trie *t, 48035c6edacSAlexander Duyck struct key_vector *oldtnode, 48135c6edacSAlexander Duyck struct key_vector *tn) 48269fa57b1SAlexander Duyck { 48335c6edacSAlexander Duyck struct key_vector *tp = node_parent(oldtnode); 48469fa57b1SAlexander Duyck unsigned long i; 48569fa57b1SAlexander Duyck 48669fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 48769fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 48888bae714SAlexander Duyck put_child_root(tp, tn->key, tn); 48969fa57b1SAlexander Duyck 49069fa57b1SAlexander Duyck /* update all of the child parent pointers */ 49169fa57b1SAlexander Duyck update_children(tn); 49269fa57b1SAlexander Duyck 49369fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 49469fa57b1SAlexander Duyck tnode_free(oldtnode); 49569fa57b1SAlexander Duyck 49669fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 4972e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 498754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 49969fa57b1SAlexander Duyck 50069fa57b1SAlexander Duyck /* resize child node */ 50169fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 50288bae714SAlexander Duyck tn = resize(t, inode); 50369fa57b1SAlexander Duyck } 5048d8e810cSAlexander Duyck 50588bae714SAlexander Duyck return tp; 50669fa57b1SAlexander Duyck } 50769fa57b1SAlexander Duyck 50888bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t, 50935c6edacSAlexander Duyck struct key_vector *oldtnode) 51019baf839SRobert Olsson { 51135c6edacSAlexander Duyck struct key_vector *tn; 51269fa57b1SAlexander Duyck unsigned long i; 513e9b44019SAlexander Duyck t_key m; 51419baf839SRobert Olsson 5150c7770c7SStephen Hemminger pr_debug("In inflate\n"); 51619baf839SRobert Olsson 517e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5182f80b3c8SRobert Olsson if (!tn) 5198d8e810cSAlexander Duyck goto notnode; 5202f36895aSRobert Olsson 52169fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 52269fa57b1SAlexander Duyck tnode_free_init(oldtnode); 52369fa57b1SAlexander Duyck 52412c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 52512c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 52612c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 52712c081a5SAlexander Duyck * nodes. 5282f36895aSRobert Olsson */ 5292e1ac88aSAlexander Duyck for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { 530754baf8dSAlexander Duyck struct key_vector *inode = get_child(oldtnode, --i); 53135c6edacSAlexander Duyck struct key_vector *node0, *node1; 53269fa57b1SAlexander Duyck unsigned long j, k; 53319baf839SRobert Olsson 53419baf839SRobert Olsson /* An empty child */ 53551456b29SIan Morris if (!inode) 53619baf839SRobert Olsson continue; 53719baf839SRobert Olsson 53819baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 539adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 540e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 54119baf839SRobert Olsson continue; 54219baf839SRobert Olsson } 54319baf839SRobert Olsson 54469fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 54569fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 54669fa57b1SAlexander Duyck 54719baf839SRobert Olsson /* An internal node with two children */ 54819baf839SRobert Olsson if (inode->bits == 1) { 549754baf8dSAlexander Duyck put_child(tn, 2 * i + 1, get_child(inode, 1)); 550754baf8dSAlexander Duyck put_child(tn, 2 * i, get_child(inode, 0)); 55191b9a277SOlof Johansson continue; 55219baf839SRobert Olsson } 55319baf839SRobert Olsson 55419baf839SRobert Olsson /* We will replace this node 'inode' with two new 55512c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 55619baf839SRobert Olsson * original children. The two new nodes will have 55719baf839SRobert Olsson * a position one bit further down the key and this 55819baf839SRobert Olsson * means that the "significant" part of their keys 55919baf839SRobert Olsson * (see the discussion near the top of this file) 56019baf839SRobert Olsson * will differ by one bit, which will be "0" in 56112c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 56219baf839SRobert Olsson * moving the key position by one step, the bit that 56319baf839SRobert Olsson * we are moving away from - the bit at position 56412c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 56512c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 56619baf839SRobert Olsson * two new keys. 56719baf839SRobert Olsson */ 56812c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 56912c081a5SAlexander Duyck if (!node1) 57012c081a5SAlexander Duyck goto nomem; 57169fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 57219baf839SRobert Olsson 57369fa57b1SAlexander Duyck tnode_free_append(tn, node1); 57412c081a5SAlexander Duyck if (!node0) 57512c081a5SAlexander Duyck goto nomem; 57612c081a5SAlexander Duyck tnode_free_append(tn, node0); 5772f36895aSRobert Olsson 57812c081a5SAlexander Duyck /* populate child pointers in new nodes */ 5792e1ac88aSAlexander Duyck for (k = child_length(inode), j = k / 2; j;) { 580754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 581754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 582754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 583754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 58419baf839SRobert Olsson } 585ff181ed8SAlexander Duyck 58612c081a5SAlexander Duyck /* link new nodes to parent */ 58712c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 58812c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 58912c081a5SAlexander Duyck 59012c081a5SAlexander Duyck /* link parent to nodes */ 59112c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 59212c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 59312c081a5SAlexander Duyck } 59412c081a5SAlexander Duyck 59569fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 5968d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 597ff181ed8SAlexander Duyck nomem: 598fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 599fc86a93bSAlexander Duyck tnode_free(tn); 6008d8e810cSAlexander Duyck notnode: 6018d8e810cSAlexander Duyck return NULL; 602ff181ed8SAlexander Duyck } 603ff181ed8SAlexander Duyck 60488bae714SAlexander Duyck static struct key_vector *halve(struct trie *t, 60535c6edacSAlexander Duyck struct key_vector *oldtnode) 60619baf839SRobert Olsson { 60735c6edacSAlexander Duyck struct key_vector *tn; 60812c081a5SAlexander Duyck unsigned long i; 60919baf839SRobert Olsson 6100c7770c7SStephen Hemminger pr_debug("In halve\n"); 61119baf839SRobert Olsson 612e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6132f80b3c8SRobert Olsson if (!tn) 6148d8e810cSAlexander Duyck goto notnode; 6152f36895aSRobert Olsson 61669fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 61769fa57b1SAlexander Duyck tnode_free_init(oldtnode); 61869fa57b1SAlexander Duyck 61912c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 62012c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 62112c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 62212c081a5SAlexander Duyck * nodes. 6232f36895aSRobert Olsson */ 6242e1ac88aSAlexander Duyck for (i = child_length(oldtnode); i;) { 625754baf8dSAlexander Duyck struct key_vector *node1 = get_child(oldtnode, --i); 626754baf8dSAlexander Duyck struct key_vector *node0 = get_child(oldtnode, --i); 62735c6edacSAlexander Duyck struct key_vector *inode; 6282f36895aSRobert Olsson 62912c081a5SAlexander Duyck /* At least one of the children is empty */ 63012c081a5SAlexander Duyck if (!node1 || !node0) { 63112c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 63212c081a5SAlexander Duyck continue; 63312c081a5SAlexander Duyck } 6342f36895aSRobert Olsson 6352f36895aSRobert Olsson /* Two nonempty children */ 63612c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 6378d8e810cSAlexander Duyck if (!inode) 6388d8e810cSAlexander Duyck goto nomem; 63912c081a5SAlexander Duyck tnode_free_append(tn, inode); 6402f80b3c8SRobert Olsson 64112c081a5SAlexander Duyck /* initialize pointers out of node */ 64212c081a5SAlexander Duyck put_child(inode, 1, node1); 64312c081a5SAlexander Duyck put_child(inode, 0, node0); 64412c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 64512c081a5SAlexander Duyck 64612c081a5SAlexander Duyck /* link parent to node */ 64712c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6482f36895aSRobert Olsson } 6492f36895aSRobert Olsson 65069fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6518d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 6528d8e810cSAlexander Duyck nomem: 6538d8e810cSAlexander Duyck /* all pointers should be clean so we are done */ 6548d8e810cSAlexander Duyck tnode_free(tn); 6558d8e810cSAlexander Duyck notnode: 6568d8e810cSAlexander Duyck return NULL; 6572f80b3c8SRobert Olsson } 65819baf839SRobert Olsson 65988bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t, 66088bae714SAlexander Duyck struct key_vector *oldtnode) 66195f60ea3SAlexander Duyck { 66235c6edacSAlexander Duyck struct key_vector *n, *tp; 66395f60ea3SAlexander Duyck unsigned long i; 66495f60ea3SAlexander Duyck 66595f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 6662e1ac88aSAlexander Duyck for (n = NULL, i = child_length(oldtnode); !n && i;) 667754baf8dSAlexander Duyck n = get_child(oldtnode, --i); 66895f60ea3SAlexander Duyck 66995f60ea3SAlexander Duyck /* compress one level */ 67095f60ea3SAlexander Duyck tp = node_parent(oldtnode); 67188bae714SAlexander Duyck put_child_root(tp, oldtnode->key, n); 67295f60ea3SAlexander Duyck node_set_parent(n, tp); 67395f60ea3SAlexander Duyck 67495f60ea3SAlexander Duyck /* drop dead node */ 67595f60ea3SAlexander Duyck node_free(oldtnode); 67688bae714SAlexander Duyck 67788bae714SAlexander Duyck return tp; 67895f60ea3SAlexander Duyck } 67995f60ea3SAlexander Duyck 68035c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn) 6815405afd1SAlexander Duyck { 6825405afd1SAlexander Duyck unsigned char slen = tn->pos; 6835405afd1SAlexander Duyck unsigned long stride, i; 6845405afd1SAlexander Duyck 6855405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 6865405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 6875405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 6885405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 6895405afd1SAlexander Duyck */ 6902e1ac88aSAlexander Duyck for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { 691754baf8dSAlexander Duyck struct key_vector *n = get_child(tn, i); 6925405afd1SAlexander Duyck 6935405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 6945405afd1SAlexander Duyck continue; 6955405afd1SAlexander Duyck 6965405afd1SAlexander Duyck /* update stride and slen based on new value */ 6975405afd1SAlexander Duyck stride <<= (n->slen - slen); 6985405afd1SAlexander Duyck slen = n->slen; 6995405afd1SAlexander Duyck i &= ~(stride - 1); 7005405afd1SAlexander Duyck 7015405afd1SAlexander Duyck /* if slen covers all but the last bit we can stop here 7025405afd1SAlexander Duyck * there will be nothing longer than that since only node 7035405afd1SAlexander Duyck * 0 and 1 << (bits - 1) could have that as their suffix 7045405afd1SAlexander Duyck * length. 7055405afd1SAlexander Duyck */ 7065405afd1SAlexander Duyck if ((slen + 1) >= (tn->pos + tn->bits)) 7075405afd1SAlexander Duyck break; 7085405afd1SAlexander Duyck } 7095405afd1SAlexander Duyck 7105405afd1SAlexander Duyck tn->slen = slen; 7115405afd1SAlexander Duyck 7125405afd1SAlexander Duyck return slen; 7135405afd1SAlexander Duyck } 7145405afd1SAlexander Duyck 715f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 716cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 717cf3637bbSAlexander Duyck * Telecommunications, page 6: 718cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 719cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 720cf3637bbSAlexander Duyck * 721cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 722cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 7232e1ac88aSAlexander Duyck * child_length() and instead of multiplying by 2 (since the 724cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 725cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 726cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 727cf3637bbSAlexander Duyck * 7282e1ac88aSAlexander Duyck * The left-hand side may look a bit weird: child_length(tn) 729cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 730cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 731cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 732cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 733cf3637bbSAlexander Duyck * we just count them one extra time here. 734cf3637bbSAlexander Duyck * 735cf3637bbSAlexander Duyck * A clearer way to write this would be: 736cf3637bbSAlexander Duyck * 737cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 7382e1ac88aSAlexander Duyck * not_to_be_doubled = child_length(tn) - tn->empty_children - 739cf3637bbSAlexander Duyck * tn->full_children; 740cf3637bbSAlexander Duyck * 7412e1ac88aSAlexander Duyck * new_child_length = child_length(tn) * 2; 742cf3637bbSAlexander Duyck * 743cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 744cf3637bbSAlexander Duyck * new_child_length; 745cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 746cf3637bbSAlexander Duyck * 747cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 748cf3637bbSAlexander Duyck * 749cf3637bbSAlexander Duyck * anyway, 750cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 751cf3637bbSAlexander Duyck * inflate_threshold 752cf3637bbSAlexander Duyck * 753cf3637bbSAlexander Duyck * avoid a division: 754cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 755cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 756cf3637bbSAlexander Duyck * 757cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 7582e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 759cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 760cf3637bbSAlexander Duyck * 761cf3637bbSAlexander Duyck * expand new_child_length: 7622e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 763cf3637bbSAlexander Duyck * tn->full_children) >= 7642e1ac88aSAlexander Duyck * inflate_threshold * child_length(tn) * 2 765cf3637bbSAlexander Duyck * 766cf3637bbSAlexander Duyck * shorten again: 7672e1ac88aSAlexander Duyck * 50 * (tn->full_children + child_length(tn) - 768cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 7692e1ac88aSAlexander Duyck * child_length(tn) 770cf3637bbSAlexander Duyck * 771cf3637bbSAlexander Duyck */ 77235c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) 773f05a4819SAlexander Duyck { 7742e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 775f05a4819SAlexander Duyck unsigned long threshold = used; 776cf3637bbSAlexander Duyck 777cf3637bbSAlexander Duyck /* Keep root node larger */ 77888bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; 7796e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 7806e22d174SAlexander Duyck used += tn_info(tn)->full_children; 781cf3637bbSAlexander Duyck 78295f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 78395f60ea3SAlexander Duyck 78495f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 785cf3637bbSAlexander Duyck } 786cf3637bbSAlexander Duyck 78735c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) 788f05a4819SAlexander Duyck { 7892e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 790f05a4819SAlexander Duyck unsigned long threshold = used; 791cf3637bbSAlexander Duyck 792f05a4819SAlexander Duyck /* Keep root node larger */ 79388bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; 7946e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 795f05a4819SAlexander Duyck 79695f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 79795f60ea3SAlexander Duyck 79895f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 79995f60ea3SAlexander Duyck } 80095f60ea3SAlexander Duyck 80135c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn) 80295f60ea3SAlexander Duyck { 8032e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 80495f60ea3SAlexander Duyck 8056e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 80695f60ea3SAlexander Duyck 80795f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 8086e22d174SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) 80995f60ea3SAlexander Duyck used -= KEY_MAX; 81095f60ea3SAlexander Duyck 81195f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 81295f60ea3SAlexander Duyck return used < 2; 813f05a4819SAlexander Duyck } 814f05a4819SAlexander Duyck 815f05a4819SAlexander Duyck #define MAX_WORK 10 81688bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn) 817f05a4819SAlexander Duyck { 8188d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8198d8e810cSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 8208d8e810cSAlexander Duyck #endif 82135c6edacSAlexander Duyck struct key_vector *tp = node_parent(tn); 82288bae714SAlexander Duyck unsigned long cindex = get_index(tn->key, tp); 823a80e89d4SAlexander Duyck int max_work = MAX_WORK; 824f05a4819SAlexander Duyck 825f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 826f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 827f05a4819SAlexander Duyck 828ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 829ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 830ff181ed8SAlexander Duyck * thing without us interfering 831ff181ed8SAlexander Duyck */ 83288bae714SAlexander Duyck BUG_ON(tn != get_child(tp, cindex)); 833ff181ed8SAlexander Duyck 834f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 835f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 836f05a4819SAlexander Duyck */ 837b6f15f82SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 83888bae714SAlexander Duyck tp = inflate(t, tn); 83988bae714SAlexander Duyck if (!tp) { 840cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8418d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 842cf3637bbSAlexander Duyck #endif 843cf3637bbSAlexander Duyck break; 844cf3637bbSAlexander Duyck } 845ff181ed8SAlexander Duyck 846b6f15f82SAlexander Duyck max_work--; 84788bae714SAlexander Duyck tn = get_child(tp, cindex); 848cf3637bbSAlexander Duyck } 849cf3637bbSAlexander Duyck 850b6f15f82SAlexander Duyck /* update parent in case inflate failed */ 851b6f15f82SAlexander Duyck tp = node_parent(tn); 852b6f15f82SAlexander Duyck 853cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 854cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 855b6f15f82SAlexander Duyck return tp; 856cf3637bbSAlexander Duyck 857f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 858cf3637bbSAlexander Duyck * node is above threshold. 859cf3637bbSAlexander Duyck */ 860b6f15f82SAlexander Duyck while (should_halve(tp, tn) && max_work) { 86188bae714SAlexander Duyck tp = halve(t, tn); 86288bae714SAlexander Duyck if (!tp) { 863cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8648d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 865cf3637bbSAlexander Duyck #endif 866cf3637bbSAlexander Duyck break; 867cf3637bbSAlexander Duyck } 868cf3637bbSAlexander Duyck 869b6f15f82SAlexander Duyck max_work--; 87088bae714SAlexander Duyck tn = get_child(tp, cindex); 871ff181ed8SAlexander Duyck } 872cf3637bbSAlexander Duyck 873cf3637bbSAlexander Duyck /* Only one child remains */ 87488bae714SAlexander Duyck if (should_collapse(tn)) 87588bae714SAlexander Duyck return collapse(t, tn); 87688bae714SAlexander Duyck 877b6f15f82SAlexander Duyck /* update parent in case halve failed */ 87888bae714SAlexander Duyck tp = node_parent(tn); 8795405afd1SAlexander Duyck 8805405afd1SAlexander Duyck /* Return if at least one deflate was run */ 8815405afd1SAlexander Duyck if (max_work != MAX_WORK) 88288bae714SAlexander Duyck return tp; 8835405afd1SAlexander Duyck 8845405afd1SAlexander Duyck /* push the suffix length to the parent node */ 8855405afd1SAlexander Duyck if (tn->slen > tn->pos) { 8865405afd1SAlexander Duyck unsigned char slen = update_suffix(tn); 8875405afd1SAlexander Duyck 88888bae714SAlexander Duyck if (slen > tp->slen) 8895405afd1SAlexander Duyck tp->slen = slen; 890cf3637bbSAlexander Duyck } 8918d8e810cSAlexander Duyck 89288bae714SAlexander Duyck return tp; 893cf3637bbSAlexander Duyck } 894cf3637bbSAlexander Duyck 89535c6edacSAlexander Duyck static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) 89619baf839SRobert Olsson { 89788bae714SAlexander Duyck while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { 8985405afd1SAlexander Duyck if (update_suffix(tp) > l->slen) 8995405afd1SAlexander Duyck break; 9005405afd1SAlexander Duyck tp = node_parent(tp); 9015405afd1SAlexander Duyck } 9025405afd1SAlexander Duyck } 9035405afd1SAlexander Duyck 90435c6edacSAlexander Duyck static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) 9055405afd1SAlexander Duyck { 9065405afd1SAlexander Duyck /* if this is a new leaf then tn will be NULL and we can sort 9075405afd1SAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 9085405afd1SAlexander Duyck */ 90988bae714SAlexander Duyck while (tn->slen < l->slen) { 9105405afd1SAlexander Duyck tn->slen = l->slen; 9115405afd1SAlexander Duyck tn = node_parent(tn); 9125405afd1SAlexander Duyck } 9135405afd1SAlexander Duyck } 9145405afd1SAlexander Duyck 9152373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 91635c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t, 91735c6edacSAlexander Duyck struct key_vector **tp, u32 key) 91819baf839SRobert Olsson { 91988bae714SAlexander Duyck struct key_vector *pn, *n = t->kv; 92088bae714SAlexander Duyck unsigned long index = 0; 92119baf839SRobert Olsson 92288bae714SAlexander Duyck do { 92388bae714SAlexander Duyck pn = n; 92488bae714SAlexander Duyck n = get_child_rcu(n, index); 92588bae714SAlexander Duyck 92688bae714SAlexander Duyck if (!n) 92788bae714SAlexander Duyck break; 92888bae714SAlexander Duyck 92988bae714SAlexander Duyck index = get_cindex(key, n); 93019baf839SRobert Olsson 931939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 932939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 933939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 934939afb06SAlexander Duyck * is the difference between the key and this value. From 935939afb06SAlexander Duyck * this we can actually derive several pieces of data. 936d4a975e8SAlexander Duyck * if (index >= (1ul << bits)) 937939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 938b3832117SAlexander Duyck * else 939b3832117SAlexander Duyck * we know the value is cindex 940d4a975e8SAlexander Duyck * 941d4a975e8SAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 942d4a975e8SAlexander Duyck * fact that we can only allocate a node with 32 bits if a 943d4a975e8SAlexander Duyck * long is greater than 32 bits. 944939afb06SAlexander Duyck */ 945d4a975e8SAlexander Duyck if (index >= (1ul << n->bits)) { 946d4a975e8SAlexander Duyck n = NULL; 947d4a975e8SAlexander Duyck break; 948d4a975e8SAlexander Duyck } 949939afb06SAlexander Duyck 95088bae714SAlexander Duyck /* keep searching until we find a perfect match leaf or NULL */ 95188bae714SAlexander Duyck } while (IS_TNODE(n)); 952939afb06SAlexander Duyck 95335c6edacSAlexander Duyck *tp = pn; 954d4a975e8SAlexander Duyck 955939afb06SAlexander Duyck return n; 95619baf839SRobert Olsson } 95719baf839SRobert Olsson 95802525368SAlexander Duyck /* Return the first fib alias matching TOS with 95902525368SAlexander Duyck * priority less than or equal to PRIO. 96002525368SAlexander Duyck */ 96179e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, 9620b65bd97SAlexander Duyck u8 tos, u32 prio, u32 tb_id) 96302525368SAlexander Duyck { 96402525368SAlexander Duyck struct fib_alias *fa; 96502525368SAlexander Duyck 96602525368SAlexander Duyck if (!fah) 96702525368SAlexander Duyck return NULL; 96802525368SAlexander Duyck 96956315f9eSAlexander Duyck hlist_for_each_entry(fa, fah, fa_list) { 97079e5ad2cSAlexander Duyck if (fa->fa_slen < slen) 97179e5ad2cSAlexander Duyck continue; 97279e5ad2cSAlexander Duyck if (fa->fa_slen != slen) 97379e5ad2cSAlexander Duyck break; 9740b65bd97SAlexander Duyck if (fa->tb_id > tb_id) 9750b65bd97SAlexander Duyck continue; 9760b65bd97SAlexander Duyck if (fa->tb_id != tb_id) 9770b65bd97SAlexander Duyck break; 97802525368SAlexander Duyck if (fa->fa_tos > tos) 97902525368SAlexander Duyck continue; 98002525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 98102525368SAlexander Duyck return fa; 98202525368SAlexander Duyck } 98302525368SAlexander Duyck 98402525368SAlexander Duyck return NULL; 98502525368SAlexander Duyck } 98602525368SAlexander Duyck 98735c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn) 98819baf839SRobert Olsson { 98988bae714SAlexander Duyck while (!IS_TRIE(tn)) 99088bae714SAlexander Duyck tn = resize(t, tn); 99119baf839SRobert Olsson } 99219baf839SRobert Olsson 99335c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp, 994d5d6487cSAlexander Duyck struct fib_alias *new, t_key key) 99519baf839SRobert Olsson { 99635c6edacSAlexander Duyck struct key_vector *n, *l; 997836a0123SAlexander Duyck 998d5d6487cSAlexander Duyck l = leaf_new(key, new); 99979e5ad2cSAlexander Duyck if (!l) 10008d8e810cSAlexander Duyck goto noleaf; 1001d5d6487cSAlexander Duyck 1002d5d6487cSAlexander Duyck /* retrieve child from parent node */ 1003754baf8dSAlexander Duyck n = get_child(tp, get_index(key, tp)); 100419baf839SRobert Olsson 1005836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1006836a0123SAlexander Duyck * 100719baf839SRobert Olsson * Add a new tnode here 100819baf839SRobert Olsson * first tnode need some special handling 1009836a0123SAlexander Duyck * leaves us in position for handling as case 3 101019baf839SRobert Olsson */ 101119baf839SRobert Olsson if (n) { 101235c6edacSAlexander Duyck struct key_vector *tn; 1013f835e471SRobert Olsson 1014e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 10158d8e810cSAlexander Duyck if (!tn) 10168d8e810cSAlexander Duyck goto notnode; 101719baf839SRobert Olsson 1018836a0123SAlexander Duyck /* initialize routes out of node */ 1019836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1020836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 102119baf839SRobert Olsson 1022836a0123SAlexander Duyck /* start adding routes into the node */ 102388bae714SAlexander Duyck put_child_root(tp, key, tn); 1024836a0123SAlexander Duyck node_set_parent(n, tn); 102519baf839SRobert Olsson 1026836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1027e962f302SAlexander Duyck tp = tn; 102819baf839SRobert Olsson } 102991b9a277SOlof Johansson 1030836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1031836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 103288bae714SAlexander Duyck put_child_root(tp, key, l); 10337b85576dSJarek Poplawski trie_rebalance(t, tp); 1034d5d6487cSAlexander Duyck 1035d5d6487cSAlexander Duyck return 0; 10368d8e810cSAlexander Duyck notnode: 10378d8e810cSAlexander Duyck node_free(l); 10388d8e810cSAlexander Duyck noleaf: 10398d8e810cSAlexander Duyck return -ENOMEM; 1040d5d6487cSAlexander Duyck } 1041d5d6487cSAlexander Duyck 104235c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp, 104335c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *new, 1044d5d6487cSAlexander Duyck struct fib_alias *fa, t_key key) 1045d5d6487cSAlexander Duyck { 1046d5d6487cSAlexander Duyck if (!l) 1047d5d6487cSAlexander Duyck return fib_insert_node(t, tp, new, key); 1048d5d6487cSAlexander Duyck 1049d5d6487cSAlexander Duyck if (fa) { 1050d5d6487cSAlexander Duyck hlist_add_before_rcu(&new->fa_list, &fa->fa_list); 1051836a0123SAlexander Duyck } else { 1052d5d6487cSAlexander Duyck struct fib_alias *last; 1053d5d6487cSAlexander Duyck 1054d5d6487cSAlexander Duyck hlist_for_each_entry(last, &l->leaf, fa_list) { 1055d5d6487cSAlexander Duyck if (new->fa_slen < last->fa_slen) 1056d5d6487cSAlexander Duyck break; 10570b65bd97SAlexander Duyck if ((new->fa_slen == last->fa_slen) && 10580b65bd97SAlexander Duyck (new->tb_id > last->tb_id)) 10590b65bd97SAlexander Duyck break; 1060d5d6487cSAlexander Duyck fa = last; 1061836a0123SAlexander Duyck } 1062836a0123SAlexander Duyck 1063d5d6487cSAlexander Duyck if (fa) 1064d5d6487cSAlexander Duyck hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); 1065d5d6487cSAlexander Duyck else 1066d5d6487cSAlexander Duyck hlist_add_head_rcu(&new->fa_list, &l->leaf); 106719baf839SRobert Olsson } 106819baf839SRobert Olsson 1069d5d6487cSAlexander Duyck /* if we added to the tail node then we need to update slen */ 1070d5d6487cSAlexander Duyck if (l->slen < new->fa_slen) { 1071d5d6487cSAlexander Duyck l->slen = new->fa_slen; 1072d5d6487cSAlexander Duyck leaf_push_suffix(tp, l); 1073d5d6487cSAlexander Duyck } 1074d5d6487cSAlexander Duyck 1075d5d6487cSAlexander Duyck return 0; 1076d5d6487cSAlexander Duyck } 1077d5d6487cSAlexander Duyck 1078d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 107916c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 108019baf839SRobert Olsson { 108119baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 108219baf839SRobert Olsson struct fib_alias *fa, *new_fa; 108335c6edacSAlexander Duyck struct key_vector *l, *tp; 1084b93e1fa7SGuillaume Nault u16 nlflags = NLM_F_EXCL; 108519baf839SRobert Olsson struct fib_info *fi; 108679e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 108779e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 10884e902c57SThomas Graf u8 tos = cfg->fc_tos; 1089d4a975e8SAlexander Duyck u32 key; 109019baf839SRobert Olsson int err; 109119baf839SRobert Olsson 10925786ec60SAlexander Duyck if (plen > KEYLENGTH) 109319baf839SRobert Olsson return -EINVAL; 109419baf839SRobert Olsson 10954e902c57SThomas Graf key = ntohl(cfg->fc_dst); 109619baf839SRobert Olsson 10972dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 109819baf839SRobert Olsson 1099d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 110019baf839SRobert Olsson return -EINVAL; 110119baf839SRobert Olsson 11024e902c57SThomas Graf fi = fib_create_info(cfg); 11034e902c57SThomas Graf if (IS_ERR(fi)) { 11044e902c57SThomas Graf err = PTR_ERR(fi); 110519baf839SRobert Olsson goto err; 11064e902c57SThomas Graf } 110719baf839SRobert Olsson 1108d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 11090b65bd97SAlexander Duyck fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, 11100b65bd97SAlexander Duyck tb->tb_id) : NULL; 111119baf839SRobert Olsson 111219baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 111319baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 111419baf839SRobert Olsson * exists or to the node before which we will insert new one. 111519baf839SRobert Olsson * 111619baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 111756315f9eSAlexander Duyck * insert to the tail of the section matching the suffix length 111856315f9eSAlexander Duyck * of the new alias. 111919baf839SRobert Olsson */ 112019baf839SRobert Olsson 1121936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1122936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1123936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 112419baf839SRobert Olsson 112519baf839SRobert Olsson err = -EEXIST; 11264e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 112719baf839SRobert Olsson goto out; 112819baf839SRobert Olsson 1129b93e1fa7SGuillaume Nault nlflags &= ~NLM_F_EXCL; 1130b93e1fa7SGuillaume Nault 1131936f6f8eSJulian Anastasov /* We have 2 goals: 1132936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1133936f6f8eSJulian Anastasov * duplicate routes 1134936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1135936f6f8eSJulian Anastasov */ 1136936f6f8eSJulian Anastasov fa_match = NULL; 1137936f6f8eSJulian Anastasov fa_first = fa; 113856315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 11390b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 11400b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 11410b65bd97SAlexander Duyck (fa->fa_tos != tos)) 1142936f6f8eSJulian Anastasov break; 1143936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1144936f6f8eSJulian Anastasov break; 1145936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1146936f6f8eSJulian Anastasov fa->fa_info == fi) { 1147936f6f8eSJulian Anastasov fa_match = fa; 1148936f6f8eSJulian Anastasov break; 1149936f6f8eSJulian Anastasov } 1150936f6f8eSJulian Anastasov } 1151936f6f8eSJulian Anastasov 11524e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 115319baf839SRobert Olsson struct fib_info *fi_drop; 115419baf839SRobert Olsson u8 state; 115519baf839SRobert Olsson 1156b93e1fa7SGuillaume Nault nlflags |= NLM_F_REPLACE; 1157936f6f8eSJulian Anastasov fa = fa_first; 1158936f6f8eSJulian Anastasov if (fa_match) { 1159936f6f8eSJulian Anastasov if (fa == fa_match) 1160936f6f8eSJulian Anastasov err = 0; 11616725033fSJoonwoo Park goto out; 1162936f6f8eSJulian Anastasov } 11632373ce1cSRobert Olsson err = -ENOBUFS; 1164e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 116551456b29SIan Morris if (!new_fa) 11662373ce1cSRobert Olsson goto out; 116719baf839SRobert Olsson 116819baf839SRobert Olsson fi_drop = fa->fa_info; 11692373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 11702373ce1cSRobert Olsson new_fa->fa_info = fi; 11714e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 117219baf839SRobert Olsson state = fa->fa_state; 1173936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 11749b6ebad5SAlexander Duyck new_fa->fa_slen = fa->fa_slen; 1175d4e64c29SMichal Kubeček new_fa->tb_id = tb->tb_id; 11762392debcSJulian Anastasov new_fa->fa_default = -1; 117719baf839SRobert Olsson 1178ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, 11798e05fd71SScott Feldman new_fa->fa_tos, 11808e05fd71SScott Feldman cfg->fc_type, 1181f8f21471SScott Feldman cfg->fc_nlflags, 11828e05fd71SScott Feldman tb->tb_id); 11838e05fd71SScott Feldman if (err) { 1184ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 11858e05fd71SScott Feldman kmem_cache_free(fn_alias_kmem, new_fa); 11868e05fd71SScott Feldman goto out; 11878e05fd71SScott Feldman } 11888e05fd71SScott Feldman 118956315f9eSAlexander Duyck hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 11908e05fd71SScott Feldman 11912373ce1cSRobert Olsson alias_free_mem_rcu(fa); 119219baf839SRobert Olsson 119319baf839SRobert Olsson fib_release_info(fi_drop); 119419baf839SRobert Olsson if (state & FA_S_ACCESSED) 11954ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1196b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1197b93e1fa7SGuillaume Nault tb->tb_id, &cfg->fc_nlinfo, nlflags); 119819baf839SRobert Olsson 119919baf839SRobert Olsson goto succeeded; 120019baf839SRobert Olsson } 120119baf839SRobert Olsson /* Error if we find a perfect match which 120219baf839SRobert Olsson * uses the same scope, type, and nexthop 120319baf839SRobert Olsson * information. 120419baf839SRobert Olsson */ 1205936f6f8eSJulian Anastasov if (fa_match) 120619baf839SRobert Olsson goto out; 1207a07f5f50SStephen Hemminger 1208a2bb6d7dSRoopa Prabhu if (cfg->fc_nlflags & NLM_F_APPEND) 1209b93e1fa7SGuillaume Nault nlflags |= NLM_F_APPEND; 1210a2bb6d7dSRoopa Prabhu else 1211936f6f8eSJulian Anastasov fa = fa_first; 121219baf839SRobert Olsson } 121319baf839SRobert Olsson err = -ENOENT; 12144e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 121519baf839SRobert Olsson goto out; 121619baf839SRobert Olsson 1217b93e1fa7SGuillaume Nault nlflags |= NLM_F_CREATE; 121819baf839SRobert Olsson err = -ENOBUFS; 1219e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 122051456b29SIan Morris if (!new_fa) 122119baf839SRobert Olsson goto out; 122219baf839SRobert Olsson 122319baf839SRobert Olsson new_fa->fa_info = fi; 122419baf839SRobert Olsson new_fa->fa_tos = tos; 12254e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 122619baf839SRobert Olsson new_fa->fa_state = 0; 122779e5ad2cSAlexander Duyck new_fa->fa_slen = slen; 12280ddcf43dSAlexander Duyck new_fa->tb_id = tb->tb_id; 12292392debcSJulian Anastasov new_fa->fa_default = -1; 123019baf839SRobert Olsson 12318e05fd71SScott Feldman /* (Optionally) offload fib entry to switch hardware. */ 1232ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, 1233ebb9a03aSJiri Pirko cfg->fc_nlflags, tb->tb_id); 12348e05fd71SScott Feldman if (err) { 1235ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 12368e05fd71SScott Feldman goto out_free_new_fa; 12378e05fd71SScott Feldman } 12388e05fd71SScott Feldman 12399b6ebad5SAlexander Duyck /* Insert new entry to the list. */ 1240d5d6487cSAlexander Duyck err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1241d5d6487cSAlexander Duyck if (err) 12428e05fd71SScott Feldman goto out_sw_fib_del; 124319baf839SRobert Olsson 124421d8c49eSDavid S. Miller if (!plen) 124521d8c49eSDavid S. Miller tb->tb_num_default++; 124621d8c49eSDavid S. Miller 12474ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 12480ddcf43dSAlexander Duyck rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1249a2bb6d7dSRoopa Prabhu &cfg->fc_nlinfo, nlflags); 125019baf839SRobert Olsson succeeded: 125119baf839SRobert Olsson return 0; 1252f835e471SRobert Olsson 12538e05fd71SScott Feldman out_sw_fib_del: 1254ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); 1255f835e471SRobert Olsson out_free_new_fa: 1256f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 125719baf839SRobert Olsson out: 125819baf839SRobert Olsson fib_release_info(fi); 125991b9a277SOlof Johansson err: 126019baf839SRobert Olsson return err; 126119baf839SRobert Olsson } 126219baf839SRobert Olsson 126335c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n) 12649f9e636dSAlexander Duyck { 12659f9e636dSAlexander Duyck t_key prefix = n->key; 12669f9e636dSAlexander Duyck 12679f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 12689f9e636dSAlexander Duyck } 12699f9e636dSAlexander Duyck 1270345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 127122bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1272ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 127319baf839SRobert Olsson { 127419baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 12758274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 12768274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 12778274a97aSAlexander Duyck #endif 12789f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 127935c6edacSAlexander Duyck struct key_vector *n, *pn; 128079e5ad2cSAlexander Duyck struct fib_alias *fa; 128171e8b67dSAlexander Duyck unsigned long index; 12829f9e636dSAlexander Duyck t_key cindex; 128319baf839SRobert Olsson 1284f6d3c192SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp); 1285f6d3c192SDavid Ahern 128688bae714SAlexander Duyck pn = t->kv; 128788bae714SAlexander Duyck cindex = 0; 128888bae714SAlexander Duyck 128988bae714SAlexander Duyck n = get_child_rcu(pn, cindex); 129019baf839SRobert Olsson if (!n) 1291345e9b54SAlexander Duyck return -EAGAIN; 129219baf839SRobert Olsson 129319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 12948274a97aSAlexander Duyck this_cpu_inc(stats->gets); 129519baf839SRobert Olsson #endif 129619baf839SRobert Olsson 12979f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 12989f9e636dSAlexander Duyck for (;;) { 129988bae714SAlexander Duyck index = get_cindex(key, n); 13009f9e636dSAlexander Duyck 13019f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13029f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13039f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13049f9e636dSAlexander Duyck * is the difference between the key and this value. From 13059f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 130671e8b67dSAlexander Duyck * if (index >= (1ul << bits)) 13079f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1308b3832117SAlexander Duyck * else 1309b3832117SAlexander Duyck * we know the value is cindex 131071e8b67dSAlexander Duyck * 131171e8b67dSAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 131271e8b67dSAlexander Duyck * fact that we can only allocate a node with 32 bits if a 131371e8b67dSAlexander Duyck * long is greater than 32 bits. 13149f9e636dSAlexander Duyck */ 131571e8b67dSAlexander Duyck if (index >= (1ul << n->bits)) 13169f9e636dSAlexander Duyck break; 13179f9e636dSAlexander Duyck 13189f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13199f9e636dSAlexander Duyck if (IS_LEAF(n)) 1320a07f5f50SStephen Hemminger goto found; 13219f9e636dSAlexander Duyck 13229f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13239f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13249f9e636dSAlexander Duyck */ 13255405afd1SAlexander Duyck if (n->slen > n->pos) { 13269f9e636dSAlexander Duyck pn = n; 13279f9e636dSAlexander Duyck cindex = index; 132819baf839SRobert Olsson } 1329a07f5f50SStephen Hemminger 1330754baf8dSAlexander Duyck n = get_child_rcu(n, index); 13319f9e636dSAlexander Duyck if (unlikely(!n)) 13329f9e636dSAlexander Duyck goto backtrace; 13339f9e636dSAlexander Duyck } 133419baf839SRobert Olsson 13359f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13369f9e636dSAlexander Duyck for (;;) { 13379f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 133835c6edacSAlexander Duyck struct key_vector __rcu **cptr = n->tnode; 133919baf839SRobert Olsson 13409f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13419f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13429f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13439f9e636dSAlexander Duyck */ 13445405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13459f9e636dSAlexander Duyck goto backtrace; 134619baf839SRobert Olsson 13479f9e636dSAlexander Duyck /* exit out and process leaf */ 13489f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13499f9e636dSAlexander Duyck break; 135019baf839SRobert Olsson 13519f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13529f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 13539f9e636dSAlexander Duyck * we started this traversal anyway 13549f9e636dSAlexander Duyck */ 13559f9e636dSAlexander Duyck 13569f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 13579f9e636dSAlexander Duyck backtrace: 135819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13599f9e636dSAlexander Duyck if (!n) 13608274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 136119baf839SRobert Olsson #endif 13629f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 13639f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 13649f9e636dSAlexander Duyck * up one level to see if there are any more bits to 13659f9e636dSAlexander Duyck * be stripped there. 136619baf839SRobert Olsson */ 13679f9e636dSAlexander Duyck while (!cindex) { 13689f9e636dSAlexander Duyck t_key pkey = pn->key; 136919baf839SRobert Olsson 137088bae714SAlexander Duyck /* If we don't have a parent then there is 137188bae714SAlexander Duyck * nothing for us to do as we do not have any 137288bae714SAlexander Duyck * further nodes to parse. 137388bae714SAlexander Duyck */ 137488bae714SAlexander Duyck if (IS_TRIE(pn)) 1375345e9b54SAlexander Duyck return -EAGAIN; 137619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13778274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 137819baf839SRobert Olsson #endif 13799f9e636dSAlexander Duyck /* Get Child's index */ 138088bae714SAlexander Duyck pn = node_parent_rcu(pn); 13819f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 13829f9e636dSAlexander Duyck } 13839f9e636dSAlexander Duyck 13849f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 13859f9e636dSAlexander Duyck cindex &= cindex - 1; 13869f9e636dSAlexander Duyck 13879f9e636dSAlexander Duyck /* grab pointer for next child node */ 138841b489fdSAlexander Duyck cptr = &pn->tnode[cindex]; 138919baf839SRobert Olsson } 139019baf839SRobert Olsson } 13919f9e636dSAlexander Duyck 139219baf839SRobert Olsson found: 139371e8b67dSAlexander Duyck /* this line carries forward the xor from earlier in the function */ 139471e8b67dSAlexander Duyck index = key ^ n->key; 139571e8b67dSAlexander Duyck 13969f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 139779e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 1398345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1399345e9b54SAlexander Duyck int nhsel, err; 1400345e9b54SAlexander Duyck 1401a5829f53SAlexander Duyck if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { 1402a5829f53SAlexander Duyck if (index >= (1ul << fa->fa_slen)) 14039b6ebad5SAlexander Duyck continue; 1404a5829f53SAlexander Duyck } 1405345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1406345e9b54SAlexander Duyck continue; 1407345e9b54SAlexander Duyck if (fi->fib_dead) 1408345e9b54SAlexander Duyck continue; 1409345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1410345e9b54SAlexander Duyck continue; 1411345e9b54SAlexander Duyck fib_alias_accessed(fa); 1412345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1413345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1414345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1415345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1416345e9b54SAlexander Duyck #endif 1417345e9b54SAlexander Duyck return err; 1418345e9b54SAlexander Duyck } 1419345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1420345e9b54SAlexander Duyck continue; 1421345e9b54SAlexander Duyck for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1422345e9b54SAlexander Duyck const struct fib_nh *nh = &fi->fib_nh[nhsel]; 14230eeb075fSAndy Gospodarek struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); 1424345e9b54SAlexander Duyck 1425345e9b54SAlexander Duyck if (nh->nh_flags & RTNH_F_DEAD) 1426345e9b54SAlexander Duyck continue; 14270eeb075fSAndy Gospodarek if (in_dev && 14280eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 14290eeb075fSAndy Gospodarek nh->nh_flags & RTNH_F_LINKDOWN && 14300eeb075fSAndy Gospodarek !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 14310eeb075fSAndy Gospodarek continue; 143258189ca7SDavid Ahern if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { 1433613d09b3SDavid Ahern if (flp->flowi4_oif && 1434613d09b3SDavid Ahern flp->flowi4_oif != nh->nh_oif) 1435345e9b54SAlexander Duyck continue; 1436613d09b3SDavid Ahern } 1437345e9b54SAlexander Duyck 1438345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 1439345e9b54SAlexander Duyck atomic_inc(&fi->fib_clntref); 1440345e9b54SAlexander Duyck 14419b6ebad5SAlexander Duyck res->prefixlen = KEYLENGTH - fa->fa_slen; 1442345e9b54SAlexander Duyck res->nh_sel = nhsel; 1443345e9b54SAlexander Duyck res->type = fa->fa_type; 1444345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1445345e9b54SAlexander Duyck res->fi = fi; 1446345e9b54SAlexander Duyck res->table = tb; 144779e5ad2cSAlexander Duyck res->fa_head = &n->leaf; 1448345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1449345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1450345e9b54SAlexander Duyck #endif 1451f6d3c192SDavid Ahern trace_fib_table_lookup_nh(nh); 1452f6d3c192SDavid Ahern 1453345e9b54SAlexander Duyck return err; 1454345e9b54SAlexander Duyck } 1455345e9b54SAlexander Duyck } 1456345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1457345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1458345e9b54SAlexander Duyck #endif 14599f9e636dSAlexander Duyck goto backtrace; 146019baf839SRobert Olsson } 14616fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 146219baf839SRobert Olsson 146335c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp, 146435c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *old) 1465d5d6487cSAlexander Duyck { 1466d5d6487cSAlexander Duyck /* record the location of the previous list_info entry */ 1467d5d6487cSAlexander Duyck struct hlist_node **pprev = old->fa_list.pprev; 1468d5d6487cSAlexander Duyck struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); 1469d5d6487cSAlexander Duyck 1470d5d6487cSAlexander Duyck /* remove the fib_alias from the list */ 1471d5d6487cSAlexander Duyck hlist_del_rcu(&old->fa_list); 1472d5d6487cSAlexander Duyck 1473d5d6487cSAlexander Duyck /* if we emptied the list this leaf will be freed and we can sort 1474d5d6487cSAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 1475d562f1f8SRobert Olsson */ 1476d5d6487cSAlexander Duyck if (hlist_empty(&l->leaf)) { 147788bae714SAlexander Duyck put_child_root(tp, l->key, NULL); 1478d5d6487cSAlexander Duyck node_free(l); 1479d5d6487cSAlexander Duyck trie_rebalance(t, tp); 1480d5d6487cSAlexander Duyck return; 1481d5d6487cSAlexander Duyck } 1482d5d6487cSAlexander Duyck 1483d5d6487cSAlexander Duyck /* only access fa if it is pointing at the last valid hlist_node */ 1484d5d6487cSAlexander Duyck if (*pprev) 1485d5d6487cSAlexander Duyck return; 1486d5d6487cSAlexander Duyck 1487d5d6487cSAlexander Duyck /* update the trie with the latest suffix length */ 1488d5d6487cSAlexander Duyck l->slen = fa->fa_slen; 1489d5d6487cSAlexander Duyck leaf_pull_suffix(tp, l); 1490d5d6487cSAlexander Duyck } 1491d5d6487cSAlexander Duyck 1492d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 149316c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) 149419baf839SRobert Olsson { 149519baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 149619baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 149735c6edacSAlexander Duyck struct key_vector *l, *tp; 149879e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 149979e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 1500d4a975e8SAlexander Duyck u8 tos = cfg->fc_tos; 1501d4a975e8SAlexander Duyck u32 key; 150291b9a277SOlof Johansson 150379e5ad2cSAlexander Duyck if (plen > KEYLENGTH) 150419baf839SRobert Olsson return -EINVAL; 150519baf839SRobert Olsson 15064e902c57SThomas Graf key = ntohl(cfg->fc_dst); 150719baf839SRobert Olsson 1508d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 150919baf839SRobert Olsson return -EINVAL; 151019baf839SRobert Olsson 1511d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 151219baf839SRobert Olsson if (!l) 151319baf839SRobert Olsson return -ESRCH; 151419baf839SRobert Olsson 15150b65bd97SAlexander Duyck fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); 151619baf839SRobert Olsson if (!fa) 151719baf839SRobert Olsson return -ESRCH; 151819baf839SRobert Olsson 15190c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 152019baf839SRobert Olsson 152119baf839SRobert Olsson fa_to_delete = NULL; 152256315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 152319baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 152419baf839SRobert Olsson 15250b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 15260b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 15270b65bd97SAlexander Duyck (fa->fa_tos != tos)) 152819baf839SRobert Olsson break; 152919baf839SRobert Olsson 15304e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15314e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 153237e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 153374cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 153474cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15354e902c57SThomas Graf (!cfg->fc_protocol || 15364e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15374e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 153819baf839SRobert Olsson fa_to_delete = fa; 153919baf839SRobert Olsson break; 154019baf839SRobert Olsson } 154119baf839SRobert Olsson } 154219baf839SRobert Olsson 154391b9a277SOlof Johansson if (!fa_to_delete) 154491b9a277SOlof Johansson return -ESRCH; 154519baf839SRobert Olsson 1546ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, 15478e05fd71SScott Feldman cfg->fc_type, tb->tb_id); 15488e05fd71SScott Feldman 1549d5d6487cSAlexander Duyck rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1550b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 155119baf839SRobert Olsson 155221d8c49eSDavid S. Miller if (!plen) 155321d8c49eSDavid S. Miller tb->tb_num_default--; 155421d8c49eSDavid S. Miller 1555d5d6487cSAlexander Duyck fib_remove_alias(t, tp, l, fa_to_delete); 15567289e6ddSAlexander Duyck 1557d5d6487cSAlexander Duyck if (fa_to_delete->fa_state & FA_S_ACCESSED) 15584ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 155919baf839SRobert Olsson 1560d5d6487cSAlexander Duyck fib_release_info(fa_to_delete->fa_info); 1561d5d6487cSAlexander Duyck alias_free_mem_rcu(fa_to_delete); 156219baf839SRobert Olsson return 0; 156319baf839SRobert Olsson } 156419baf839SRobert Olsson 15658be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */ 156635c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) 156719baf839SRobert Olsson { 156835c6edacSAlexander Duyck struct key_vector *pn, *n = *tn; 15698be33e95SAlexander Duyck unsigned long cindex; 157019baf839SRobert Olsson 15718be33e95SAlexander Duyck /* this loop is meant to try and find the key in the trie */ 157288bae714SAlexander Duyck do { 157388bae714SAlexander Duyck /* record parent and next child index */ 157488bae714SAlexander Duyck pn = n; 1575c2229fe1SAlexander Duyck cindex = (key > pn->key) ? get_index(key, pn) : 0; 157688bae714SAlexander Duyck 157788bae714SAlexander Duyck if (cindex >> pn->bits) 157888bae714SAlexander Duyck break; 157988bae714SAlexander Duyck 158088bae714SAlexander Duyck /* descend into the next child */ 158188bae714SAlexander Duyck n = get_child_rcu(pn, cindex++); 158288bae714SAlexander Duyck if (!n) 158388bae714SAlexander Duyck break; 15848be33e95SAlexander Duyck 15858be33e95SAlexander Duyck /* guarantee forward progress on the keys */ 15868be33e95SAlexander Duyck if (IS_LEAF(n) && (n->key >= key)) 15878be33e95SAlexander Duyck goto found; 158888bae714SAlexander Duyck } while (IS_TNODE(n)); 15898be33e95SAlexander Duyck 15908be33e95SAlexander Duyck /* this loop will search for the next leaf with a greater key */ 159188bae714SAlexander Duyck while (!IS_TRIE(pn)) { 15928be33e95SAlexander Duyck /* if we exhausted the parent node we will need to climb */ 15938be33e95SAlexander Duyck if (cindex >= (1ul << pn->bits)) { 15948be33e95SAlexander Duyck t_key pkey = pn->key; 15958be33e95SAlexander Duyck 15968be33e95SAlexander Duyck pn = node_parent_rcu(pn); 15978be33e95SAlexander Duyck cindex = get_index(pkey, pn) + 1; 15988be33e95SAlexander Duyck continue; 15998be33e95SAlexander Duyck } 16008be33e95SAlexander Duyck 16018be33e95SAlexander Duyck /* grab the next available node */ 1602754baf8dSAlexander Duyck n = get_child_rcu(pn, cindex++); 16038be33e95SAlexander Duyck if (!n) 160491b9a277SOlof Johansson continue; 160519baf839SRobert Olsson 16068be33e95SAlexander Duyck /* no need to compare keys since we bumped the index */ 16078be33e95SAlexander Duyck if (IS_LEAF(n)) 16088be33e95SAlexander Duyck goto found; 160982cfbb00SStephen Hemminger 161082cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 16118be33e95SAlexander Duyck pn = n; 16128be33e95SAlexander Duyck cindex = 0; 161319baf839SRobert Olsson } 161482cfbb00SStephen Hemminger 16158be33e95SAlexander Duyck *tn = pn; 161682cfbb00SStephen Hemminger return NULL; /* Root of trie */ 16178be33e95SAlexander Duyck found: 16188be33e95SAlexander Duyck /* if we are at the limit for keys just return NULL for the tnode */ 161988bae714SAlexander Duyck *tn = pn; 1620adaf9816SAlexander Duyck return n; 162182cfbb00SStephen Hemminger } 162282cfbb00SStephen Hemminger 16230ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb) 16240ddcf43dSAlexander Duyck { 16250ddcf43dSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 16260ddcf43dSAlexander Duyck struct key_vector *pn = t->kv; 16270ddcf43dSAlexander Duyck unsigned long cindex = 1; 16280ddcf43dSAlexander Duyck struct hlist_node *tmp; 16290ddcf43dSAlexander Duyck struct fib_alias *fa; 16300ddcf43dSAlexander Duyck 16310ddcf43dSAlexander Duyck /* walk trie in reverse order and free everything */ 16320ddcf43dSAlexander Duyck for (;;) { 16330ddcf43dSAlexander Duyck struct key_vector *n; 16340ddcf43dSAlexander Duyck 16350ddcf43dSAlexander Duyck if (!(cindex--)) { 16360ddcf43dSAlexander Duyck t_key pkey = pn->key; 16370ddcf43dSAlexander Duyck 16380ddcf43dSAlexander Duyck if (IS_TRIE(pn)) 16390ddcf43dSAlexander Duyck break; 16400ddcf43dSAlexander Duyck 16410ddcf43dSAlexander Duyck n = pn; 16420ddcf43dSAlexander Duyck pn = node_parent(pn); 16430ddcf43dSAlexander Duyck 16440ddcf43dSAlexander Duyck /* drop emptied tnode */ 16450ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16460ddcf43dSAlexander Duyck node_free(n); 16470ddcf43dSAlexander Duyck 16480ddcf43dSAlexander Duyck cindex = get_index(pkey, pn); 16490ddcf43dSAlexander Duyck 16500ddcf43dSAlexander Duyck continue; 16510ddcf43dSAlexander Duyck } 16520ddcf43dSAlexander Duyck 16530ddcf43dSAlexander Duyck /* grab the next available node */ 16540ddcf43dSAlexander Duyck n = get_child(pn, cindex); 16550ddcf43dSAlexander Duyck if (!n) 16560ddcf43dSAlexander Duyck continue; 16570ddcf43dSAlexander Duyck 16580ddcf43dSAlexander Duyck if (IS_TNODE(n)) { 16590ddcf43dSAlexander Duyck /* record pn and cindex for leaf walking */ 16600ddcf43dSAlexander Duyck pn = n; 16610ddcf43dSAlexander Duyck cindex = 1ul << n->bits; 16620ddcf43dSAlexander Duyck 16630ddcf43dSAlexander Duyck continue; 16640ddcf43dSAlexander Duyck } 16650ddcf43dSAlexander Duyck 16660ddcf43dSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 16670ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 16680ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 16690ddcf43dSAlexander Duyck } 16700ddcf43dSAlexander Duyck 16710ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16720ddcf43dSAlexander Duyck node_free(n); 16730ddcf43dSAlexander Duyck } 16740ddcf43dSAlexander Duyck 16750ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 16760ddcf43dSAlexander Duyck free_percpu(t->stats); 16770ddcf43dSAlexander Duyck #endif 16780ddcf43dSAlexander Duyck kfree(tb); 16790ddcf43dSAlexander Duyck } 16800ddcf43dSAlexander Duyck 16810ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) 16820ddcf43dSAlexander Duyck { 16830ddcf43dSAlexander Duyck struct trie *ot = (struct trie *)oldtb->tb_data; 16840ddcf43dSAlexander Duyck struct key_vector *l, *tp = ot->kv; 16850ddcf43dSAlexander Duyck struct fib_table *local_tb; 16860ddcf43dSAlexander Duyck struct fib_alias *fa; 16870ddcf43dSAlexander Duyck struct trie *lt; 16880ddcf43dSAlexander Duyck t_key key = 0; 16890ddcf43dSAlexander Duyck 16900ddcf43dSAlexander Duyck if (oldtb->tb_data == oldtb->__data) 16910ddcf43dSAlexander Duyck return oldtb; 16920ddcf43dSAlexander Duyck 16930ddcf43dSAlexander Duyck local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); 16940ddcf43dSAlexander Duyck if (!local_tb) 16950ddcf43dSAlexander Duyck return NULL; 16960ddcf43dSAlexander Duyck 16970ddcf43dSAlexander Duyck lt = (struct trie *)local_tb->tb_data; 16980ddcf43dSAlexander Duyck 16990ddcf43dSAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 17000ddcf43dSAlexander Duyck struct key_vector *local_l = NULL, *local_tp; 17010ddcf43dSAlexander Duyck 17020ddcf43dSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 17030ddcf43dSAlexander Duyck struct fib_alias *new_fa; 17040ddcf43dSAlexander Duyck 17050ddcf43dSAlexander Duyck if (local_tb->tb_id != fa->tb_id) 17060ddcf43dSAlexander Duyck continue; 17070ddcf43dSAlexander Duyck 17080ddcf43dSAlexander Duyck /* clone fa for new local table */ 17090ddcf43dSAlexander Duyck new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 17100ddcf43dSAlexander Duyck if (!new_fa) 17110ddcf43dSAlexander Duyck goto out; 17120ddcf43dSAlexander Duyck 17130ddcf43dSAlexander Duyck memcpy(new_fa, fa, sizeof(*fa)); 17140ddcf43dSAlexander Duyck 17150ddcf43dSAlexander Duyck /* insert clone into table */ 17160ddcf43dSAlexander Duyck if (!local_l) 17170ddcf43dSAlexander Duyck local_l = fib_find_node(lt, &local_tp, l->key); 17180ddcf43dSAlexander Duyck 17190ddcf43dSAlexander Duyck if (fib_insert_alias(lt, local_tp, local_l, new_fa, 17200ddcf43dSAlexander Duyck NULL, l->key)) 17210ddcf43dSAlexander Duyck goto out; 17220ddcf43dSAlexander Duyck } 17230ddcf43dSAlexander Duyck 17240ddcf43dSAlexander Duyck /* stop loop if key wrapped back to 0 */ 17250ddcf43dSAlexander Duyck key = l->key + 1; 17260ddcf43dSAlexander Duyck if (key < l->key) 17270ddcf43dSAlexander Duyck break; 17280ddcf43dSAlexander Duyck } 17290ddcf43dSAlexander Duyck 17300ddcf43dSAlexander Duyck return local_tb; 17310ddcf43dSAlexander Duyck out: 17320ddcf43dSAlexander Duyck fib_trie_free(local_tb); 17330ddcf43dSAlexander Duyck 17340ddcf43dSAlexander Duyck return NULL; 17350ddcf43dSAlexander Duyck } 17360ddcf43dSAlexander Duyck 1737104616e7SScott Feldman /* Caller must hold RTNL */ 1738104616e7SScott Feldman void fib_table_flush_external(struct fib_table *tb) 1739104616e7SScott Feldman { 1740104616e7SScott Feldman struct trie *t = (struct trie *)tb->tb_data; 174188bae714SAlexander Duyck struct key_vector *pn = t->kv; 174288bae714SAlexander Duyck unsigned long cindex = 1; 174388bae714SAlexander Duyck struct hlist_node *tmp; 1744104616e7SScott Feldman struct fib_alias *fa; 1745104616e7SScott Feldman 1746104616e7SScott Feldman /* walk trie in reverse order */ 174788bae714SAlexander Duyck for (;;) { 17480ddcf43dSAlexander Duyck unsigned char slen = 0; 174988bae714SAlexander Duyck struct key_vector *n; 175088bae714SAlexander Duyck 175188bae714SAlexander Duyck if (!(cindex--)) { 1752104616e7SScott Feldman t_key pkey = pn->key; 1753104616e7SScott Feldman 175488bae714SAlexander Duyck /* cannot resize the trie vector */ 175588bae714SAlexander Duyck if (IS_TRIE(pn)) 175688bae714SAlexander Duyck break; 1757104616e7SScott Feldman 17580ddcf43dSAlexander Duyck /* resize completed node */ 17590ddcf43dSAlexander Duyck pn = resize(t, pn); 1760104616e7SScott Feldman cindex = get_index(pkey, pn); 176188bae714SAlexander Duyck 176288bae714SAlexander Duyck continue; 1763104616e7SScott Feldman } 1764104616e7SScott Feldman 1765104616e7SScott Feldman /* grab the next available node */ 1766754baf8dSAlexander Duyck n = get_child(pn, cindex); 176788bae714SAlexander Duyck if (!n) 176888bae714SAlexander Duyck continue; 176988bae714SAlexander Duyck 177088bae714SAlexander Duyck if (IS_TNODE(n)) { 177188bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 177288bae714SAlexander Duyck pn = n; 177388bae714SAlexander Duyck cindex = 1ul << n->bits; 177488bae714SAlexander Duyck 177588bae714SAlexander Duyck continue; 1776104616e7SScott Feldman } 1777104616e7SScott Feldman 177888bae714SAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 1779104616e7SScott Feldman struct fib_info *fi = fa->fa_info; 1780104616e7SScott Feldman 17810ddcf43dSAlexander Duyck /* if alias was cloned to local then we just 17820ddcf43dSAlexander Duyck * need to remove the local copy from main 17830ddcf43dSAlexander Duyck */ 17840ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 17850ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 17860ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 17870ddcf43dSAlexander Duyck continue; 17880ddcf43dSAlexander Duyck } 17890ddcf43dSAlexander Duyck 17900ddcf43dSAlexander Duyck /* record local slen */ 17910ddcf43dSAlexander Duyck slen = fa->fa_slen; 17920ddcf43dSAlexander Duyck 1793eea39946SRoopa Prabhu if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) 179472be7260SAlexander Duyck continue; 179572be7260SAlexander Duyck 1796ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1797ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1798ebb9a03aSJiri Pirko tb->tb_id); 1799104616e7SScott Feldman } 18000ddcf43dSAlexander Duyck 18010ddcf43dSAlexander Duyck /* update leaf slen */ 18020ddcf43dSAlexander Duyck n->slen = slen; 18030ddcf43dSAlexander Duyck 18040ddcf43dSAlexander Duyck if (hlist_empty(&n->leaf)) { 18050ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 18060ddcf43dSAlexander Duyck node_free(n); 18070ddcf43dSAlexander Duyck } 180888bae714SAlexander Duyck } 1809104616e7SScott Feldman } 1810104616e7SScott Feldman 18118be33e95SAlexander Duyck /* Caller must hold RTNL. */ 181216c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb) 181319baf839SRobert Olsson { 181419baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 181588bae714SAlexander Duyck struct key_vector *pn = t->kv; 181688bae714SAlexander Duyck unsigned long cindex = 1; 18177289e6ddSAlexander Duyck struct hlist_node *tmp; 18187289e6ddSAlexander Duyck struct fib_alias *fa; 181982cfbb00SStephen Hemminger int found = 0; 182019baf839SRobert Olsson 18217289e6ddSAlexander Duyck /* walk trie in reverse order */ 182288bae714SAlexander Duyck for (;;) { 182388bae714SAlexander Duyck unsigned char slen = 0; 182488bae714SAlexander Duyck struct key_vector *n; 182588bae714SAlexander Duyck 182688bae714SAlexander Duyck if (!(cindex--)) { 18277289e6ddSAlexander Duyck t_key pkey = pn->key; 18287289e6ddSAlexander Duyck 182988bae714SAlexander Duyck /* cannot resize the trie vector */ 183088bae714SAlexander Duyck if (IS_TRIE(pn)) 183188bae714SAlexander Duyck break; 18327289e6ddSAlexander Duyck 18337289e6ddSAlexander Duyck /* resize completed node */ 183488bae714SAlexander Duyck pn = resize(t, pn); 18357289e6ddSAlexander Duyck cindex = get_index(pkey, pn); 183688bae714SAlexander Duyck 183788bae714SAlexander Duyck continue; 183864c62723SAlexander Duyck } 183964c62723SAlexander Duyck 18407289e6ddSAlexander Duyck /* grab the next available node */ 1841754baf8dSAlexander Duyck n = get_child(pn, cindex); 184288bae714SAlexander Duyck if (!n) 184388bae714SAlexander Duyck continue; 184419baf839SRobert Olsson 184588bae714SAlexander Duyck if (IS_TNODE(n)) { 184688bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 184788bae714SAlexander Duyck pn = n; 184888bae714SAlexander Duyck cindex = 1ul << n->bits; 184988bae714SAlexander Duyck 185088bae714SAlexander Duyck continue; 185188bae714SAlexander Duyck } 18527289e6ddSAlexander Duyck 18537289e6ddSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 18547289e6ddSAlexander Duyck struct fib_info *fi = fa->fa_info; 18557289e6ddSAlexander Duyck 185688bae714SAlexander Duyck if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { 185788bae714SAlexander Duyck slen = fa->fa_slen; 185888bae714SAlexander Duyck continue; 185988bae714SAlexander Duyck } 186088bae714SAlexander Duyck 1861ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1862ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1863ebb9a03aSJiri Pirko tb->tb_id); 18647289e6ddSAlexander Duyck hlist_del_rcu(&fa->fa_list); 18657289e6ddSAlexander Duyck fib_release_info(fa->fa_info); 18667289e6ddSAlexander Duyck alias_free_mem_rcu(fa); 18677289e6ddSAlexander Duyck found++; 18687289e6ddSAlexander Duyck } 18697289e6ddSAlexander Duyck 18707289e6ddSAlexander Duyck /* update leaf slen */ 18717289e6ddSAlexander Duyck n->slen = slen; 18727289e6ddSAlexander Duyck 18737289e6ddSAlexander Duyck if (hlist_empty(&n->leaf)) { 187488bae714SAlexander Duyck put_child_root(pn, n->key, NULL); 18757289e6ddSAlexander Duyck node_free(n); 18767289e6ddSAlexander Duyck } 187788bae714SAlexander Duyck } 18787289e6ddSAlexander Duyck 18790c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 188019baf839SRobert Olsson return found; 188119baf839SRobert Olsson } 188219baf839SRobert Olsson 1883a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head) 18844aa2c466SPavel Emelyanov { 1885a7e53531SAlexander Duyck struct fib_table *tb = container_of(head, struct fib_table, rcu); 18868274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 18878274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 18888274a97aSAlexander Duyck 18890ddcf43dSAlexander Duyck if (tb->tb_data == tb->__data) 18908274a97aSAlexander Duyck free_percpu(t->stats); 18918274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 18924aa2c466SPavel Emelyanov kfree(tb); 18934aa2c466SPavel Emelyanov } 18944aa2c466SPavel Emelyanov 1895a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb) 1896a7e53531SAlexander Duyck { 1897a7e53531SAlexander Duyck call_rcu(&tb->rcu, __trie_free_rcu); 1898a7e53531SAlexander Duyck } 1899a7e53531SAlexander Duyck 190035c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 190119baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 190219baf839SRobert Olsson { 190379e5ad2cSAlexander Duyck __be32 xkey = htonl(l->key); 190419baf839SRobert Olsson struct fib_alias *fa; 190579e5ad2cSAlexander Duyck int i, s_i; 190619baf839SRobert Olsson 190779e5ad2cSAlexander Duyck s_i = cb->args[4]; 190819baf839SRobert Olsson i = 0; 190919baf839SRobert Olsson 19102373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 191179e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 191219baf839SRobert Olsson if (i < s_i) { 191319baf839SRobert Olsson i++; 191419baf839SRobert Olsson continue; 191519baf839SRobert Olsson } 191619baf839SRobert Olsson 19170ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 19180ddcf43dSAlexander Duyck i++; 19190ddcf43dSAlexander Duyck continue; 19200ddcf43dSAlexander Duyck } 19210ddcf43dSAlexander Duyck 192215e47304SEric W. Biederman if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 192319baf839SRobert Olsson cb->nlh->nlmsg_seq, 192419baf839SRobert Olsson RTM_NEWROUTE, 192519baf839SRobert Olsson tb->tb_id, 192619baf839SRobert Olsson fa->fa_type, 1927be403ea1SThomas Graf xkey, 19289b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 192919baf839SRobert Olsson fa->fa_tos, 193064347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 193171d67e66SStephen Hemminger cb->args[4] = i; 193219baf839SRobert Olsson return -1; 193319baf839SRobert Olsson } 1934a88ee229SStephen Hemminger i++; 193519baf839SRobert Olsson } 1936a88ee229SStephen Hemminger 193771d67e66SStephen Hemminger cb->args[4] = i; 193819baf839SRobert Olsson return skb->len; 193919baf839SRobert Olsson } 194019baf839SRobert Olsson 1941a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */ 194216c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1943a07f5f50SStephen Hemminger struct netlink_callback *cb) 194419baf839SRobert Olsson { 194519baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 194688bae714SAlexander Duyck struct key_vector *l, *tp = t->kv; 1947d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1948d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1949d5ce8a0eSStephen Hemminger */ 19508be33e95SAlexander Duyck int count = cb->args[2]; 19518be33e95SAlexander Duyck t_key key = cb->args[3]; 1952a88ee229SStephen Hemminger 19538be33e95SAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 1954a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 19558be33e95SAlexander Duyck cb->args[3] = key; 19568be33e95SAlexander Duyck cb->args[2] = count; 195719baf839SRobert Olsson return -1; 195819baf839SRobert Olsson } 1959d5ce8a0eSStephen Hemminger 196071d67e66SStephen Hemminger ++count; 19618be33e95SAlexander Duyck key = l->key + 1; 19628be33e95SAlexander Duyck 196371d67e66SStephen Hemminger memset(&cb->args[4], 0, 196471d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 19658be33e95SAlexander Duyck 19668be33e95SAlexander Duyck /* stop loop if key wrapped back to 0 */ 19678be33e95SAlexander Duyck if (key < l->key) 19688be33e95SAlexander Duyck break; 1969a88ee229SStephen Hemminger } 19708be33e95SAlexander Duyck 19718be33e95SAlexander Duyck cb->args[3] = key; 19728be33e95SAlexander Duyck cb->args[2] = count; 19738be33e95SAlexander Duyck 1974a88ee229SStephen Hemminger return skb->len; 1975a88ee229SStephen Hemminger } 197619baf839SRobert Olsson 19775348ba85SDavid S. Miller void __init fib_trie_init(void) 19787f9b8052SStephen Hemminger { 1979a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1980a07f5f50SStephen Hemminger sizeof(struct fib_alias), 1981bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 1982bc3c8c1eSStephen Hemminger 1983bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 198441b489fdSAlexander Duyck LEAF_SIZE, 1985bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 19867f9b8052SStephen Hemminger } 198719baf839SRobert Olsson 19880ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) 198919baf839SRobert Olsson { 199019baf839SRobert Olsson struct fib_table *tb; 199119baf839SRobert Olsson struct trie *t; 19920ddcf43dSAlexander Duyck size_t sz = sizeof(*tb); 199319baf839SRobert Olsson 19940ddcf43dSAlexander Duyck if (!alias) 19950ddcf43dSAlexander Duyck sz += sizeof(struct trie); 19960ddcf43dSAlexander Duyck 19970ddcf43dSAlexander Duyck tb = kzalloc(sz, GFP_KERNEL); 199851456b29SIan Morris if (!tb) 199919baf839SRobert Olsson return NULL; 200019baf839SRobert Olsson 200119baf839SRobert Olsson tb->tb_id = id; 200221d8c49eSDavid S. Miller tb->tb_num_default = 0; 20030ddcf43dSAlexander Duyck tb->tb_data = (alias ? alias->__data : tb->__data); 20040ddcf43dSAlexander Duyck 20050ddcf43dSAlexander Duyck if (alias) 20060ddcf43dSAlexander Duyck return tb; 200719baf839SRobert Olsson 200819baf839SRobert Olsson t = (struct trie *) tb->tb_data; 200988bae714SAlexander Duyck t->kv[0].pos = KEYLENGTH; 201088bae714SAlexander Duyck t->kv[0].slen = KEYLENGTH; 20118274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 20128274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 20138274a97aSAlexander Duyck if (!t->stats) { 20148274a97aSAlexander Duyck kfree(tb); 20158274a97aSAlexander Duyck tb = NULL; 20168274a97aSAlexander Duyck } 20178274a97aSAlexander Duyck #endif 201819baf839SRobert Olsson 201919baf839SRobert Olsson return tb; 202019baf839SRobert Olsson } 202119baf839SRobert Olsson 2022cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2023cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2024cb7b593cSStephen Hemminger struct fib_trie_iter { 20251c340b2fSDenis V. Lunev struct seq_net_private p; 20263d3b2d25SStephen Hemminger struct fib_table *tb; 202735c6edacSAlexander Duyck struct key_vector *tnode; 2028a034ee3cSEric Dumazet unsigned int index; 2029a034ee3cSEric Dumazet unsigned int depth; 2030cb7b593cSStephen Hemminger }; 203119baf839SRobert Olsson 203235c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) 203319baf839SRobert Olsson { 203498293e8dSAlexander Duyck unsigned long cindex = iter->index; 203588bae714SAlexander Duyck struct key_vector *pn = iter->tnode; 203688bae714SAlexander Duyck t_key pkey; 20376640e697SEric W. Biederman 2038cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2039cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 204019baf839SRobert Olsson 204188bae714SAlexander Duyck while (!IS_TRIE(pn)) { 204288bae714SAlexander Duyck while (cindex < child_length(pn)) { 204388bae714SAlexander Duyck struct key_vector *n = get_child_rcu(pn, cindex++); 204488bae714SAlexander Duyck 204588bae714SAlexander Duyck if (!n) 204688bae714SAlexander Duyck continue; 204788bae714SAlexander Duyck 204819baf839SRobert Olsson if (IS_LEAF(n)) { 204988bae714SAlexander Duyck iter->tnode = pn; 205088bae714SAlexander Duyck iter->index = cindex; 205191b9a277SOlof Johansson } else { 2052cb7b593cSStephen Hemminger /* push down one level */ 2053adaf9816SAlexander Duyck iter->tnode = n; 2054cb7b593cSStephen Hemminger iter->index = 0; 2055cb7b593cSStephen Hemminger ++iter->depth; 205619baf839SRobert Olsson } 205788bae714SAlexander Duyck 2058cb7b593cSStephen Hemminger return n; 205919baf839SRobert Olsson } 206019baf839SRobert Olsson 2061cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 206288bae714SAlexander Duyck pkey = pn->key; 206388bae714SAlexander Duyck pn = node_parent_rcu(pn); 206488bae714SAlexander Duyck cindex = get_index(pkey, pn) + 1; 2065cb7b593cSStephen Hemminger --iter->depth; 2066cb7b593cSStephen Hemminger } 2067cb7b593cSStephen Hemminger 206888bae714SAlexander Duyck /* record root node so further searches know we are done */ 206988bae714SAlexander Duyck iter->tnode = pn; 207088bae714SAlexander Duyck iter->index = 0; 207188bae714SAlexander Duyck 2072cb7b593cSStephen Hemminger return NULL; 2073cb7b593cSStephen Hemminger } 2074cb7b593cSStephen Hemminger 207535c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2076cb7b593cSStephen Hemminger struct trie *t) 2077cb7b593cSStephen Hemminger { 2078f38b24c9SFiro Yang struct key_vector *n, *pn; 20795ddf0eb2SRobert Olsson 20805ddf0eb2SRobert Olsson if (!t) 20815ddf0eb2SRobert Olsson return NULL; 20825ddf0eb2SRobert Olsson 2083f38b24c9SFiro Yang pn = t->kv; 208488bae714SAlexander Duyck n = rcu_dereference(pn->tnode[0]); 20853d3b2d25SStephen Hemminger if (!n) 20865ddf0eb2SRobert Olsson return NULL; 2087cb7b593cSStephen Hemminger 20886640e697SEric W. Biederman if (IS_TNODE(n)) { 2089adaf9816SAlexander Duyck iter->tnode = n; 2090cb7b593cSStephen Hemminger iter->index = 0; 20911d25cd6cSRobert Olsson iter->depth = 1; 20926640e697SEric W. Biederman } else { 209388bae714SAlexander Duyck iter->tnode = pn; 20946640e697SEric W. Biederman iter->index = 0; 20956640e697SEric W. Biederman iter->depth = 0; 20966640e697SEric W. Biederman } 20973d3b2d25SStephen Hemminger 2098cb7b593cSStephen Hemminger return n; 2099cb7b593cSStephen Hemminger } 2100cb7b593cSStephen Hemminger 2101cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 210219baf839SRobert Olsson { 210335c6edacSAlexander Duyck struct key_vector *n; 2104cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2105cb7b593cSStephen Hemminger 2106cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 210719baf839SRobert Olsson 21082373ce1cSRobert Olsson rcu_read_lock(); 21093d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2110cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 211179e5ad2cSAlexander Duyck struct fib_alias *fa; 211293672292SStephen Hemminger 211319baf839SRobert Olsson s->leaves++; 2114cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2115cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2116cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 211793672292SStephen Hemminger 211879e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) 211993672292SStephen Hemminger ++s->prefixes; 212091b9a277SOlof Johansson } else { 212119baf839SRobert Olsson s->tnodes++; 2122adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 2123adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 21246e22d174SAlexander Duyck s->nullpointers += tn_info(n)->empty_children; 212519baf839SRobert Olsson } 212698293e8dSAlexander Duyck } 21272373ce1cSRobert Olsson rcu_read_unlock(); 212819baf839SRobert Olsson } 212919baf839SRobert Olsson 213019baf839SRobert Olsson /* 213119baf839SRobert Olsson * This outputs /proc/net/fib_triestats 213219baf839SRobert Olsson */ 2133cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 213419baf839SRobert Olsson { 2135a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 213619baf839SRobert Olsson 213719baf839SRobert Olsson if (stat->leaves) 213819baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 213919baf839SRobert Olsson else 214019baf839SRobert Olsson avdepth = 0; 214119baf839SRobert Olsson 2142a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2143a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2144cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2145cb7b593cSStephen Hemminger 2146cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 214741b489fdSAlexander Duyck bytes = LEAF_SIZE * stat->leaves; 214893672292SStephen Hemminger 214993672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 215079e5ad2cSAlexander Duyck bytes += sizeof(struct fib_alias) * stat->prefixes; 215193672292SStephen Hemminger 2152187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 215341b489fdSAlexander Duyck bytes += TNODE_SIZE(0) * stat->tnodes; 215419baf839SRobert Olsson 215506ef921dSRobert Olsson max = MAX_STAT_DEPTH; 215606ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 215719baf839SRobert Olsson max--; 215819baf839SRobert Olsson 2159cb7b593cSStephen Hemminger pointers = 0; 2160f585a991SJerry Snitselaar for (i = 1; i < max; i++) 216119baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2162187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 216319baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 216419baf839SRobert Olsson } 2165cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2166187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2167cb7b593cSStephen Hemminger 216835c6edacSAlexander Duyck bytes += sizeof(struct key_vector *) * pointers; 2169187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2170187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 217166a2f7fdSStephen Hemminger } 217219baf839SRobert Olsson 217319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 217466a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 21758274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 217666a2f7fdSStephen Hemminger { 21778274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 21788274a97aSAlexander Duyck int cpu; 21798274a97aSAlexander Duyck 21808274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 21818274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 21828274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 21838274a97aSAlexander Duyck 21848274a97aSAlexander Duyck s.gets += pcpu->gets; 21858274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 21868274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 21878274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 21888274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 21898274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 21908274a97aSAlexander Duyck } 21918274a97aSAlexander Duyck 219266a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 21938274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 21948274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2195a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 21968274a97aSAlexander Duyck s.semantic_match_passed); 21978274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 21988274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 21998274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 220019baf839SRobert Olsson } 220166a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 220266a2f7fdSStephen Hemminger 22033d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2204d717a9a6SStephen Hemminger { 22053d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 22063d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 22073d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 22083d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 22093d3b2d25SStephen Hemminger else 22103d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2211d717a9a6SStephen Hemminger } 221219baf839SRobert Olsson 22133d3b2d25SStephen Hemminger 221419baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 221519baf839SRobert Olsson { 22161c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 22173d3b2d25SStephen Hemminger unsigned int h; 2218877a9bffSEric W. Biederman 2219d717a9a6SStephen Hemminger seq_printf(seq, 2220a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2221a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 222241b489fdSAlexander Duyck LEAF_SIZE, TNODE_SIZE(0)); 222319baf839SRobert Olsson 22243d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22253d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22263d3b2d25SStephen Hemminger struct fib_table *tb; 2227cb7b593cSStephen Hemminger 2228b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 22293d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 22303d3b2d25SStephen Hemminger struct trie_stat stat; 22313d3b2d25SStephen Hemminger 22323d3b2d25SStephen Hemminger if (!t) 22333d3b2d25SStephen Hemminger continue; 22343d3b2d25SStephen Hemminger 22353d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22363d3b2d25SStephen Hemminger 22373d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22383d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22393d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22408274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 22413d3b2d25SStephen Hemminger #endif 22423d3b2d25SStephen Hemminger } 22433d3b2d25SStephen Hemminger } 2244cb7b593cSStephen Hemminger 224519baf839SRobert Olsson return 0; 224619baf839SRobert Olsson } 224719baf839SRobert Olsson 224819baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 224919baf839SRobert Olsson { 2250de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 22511c340b2fSDenis V. Lunev } 22521c340b2fSDenis V. Lunev 22539a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 225419baf839SRobert Olsson .owner = THIS_MODULE, 225519baf839SRobert Olsson .open = fib_triestat_seq_open, 225619baf839SRobert Olsson .read = seq_read, 225719baf839SRobert Olsson .llseek = seq_lseek, 2258b6fcbdb4SPavel Emelyanov .release = single_release_net, 225919baf839SRobert Olsson }; 226019baf839SRobert Olsson 226135c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 226219baf839SRobert Olsson { 22631218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 22641218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2265cb7b593cSStephen Hemminger loff_t idx = 0; 22663d3b2d25SStephen Hemminger unsigned int h; 22673d3b2d25SStephen Hemminger 22683d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22693d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22703d3b2d25SStephen Hemminger struct fib_table *tb; 22713d3b2d25SStephen Hemminger 2272b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 227335c6edacSAlexander Duyck struct key_vector *n; 2274cb7b593cSStephen Hemminger 22753d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 22763d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 22773d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 22783d3b2d25SStephen Hemminger if (pos == idx++) { 22793d3b2d25SStephen Hemminger iter->tb = tb; 2280cb7b593cSStephen Hemminger return n; 228119baf839SRobert Olsson } 22823d3b2d25SStephen Hemminger } 22833d3b2d25SStephen Hemminger } 228419baf839SRobert Olsson 228519baf839SRobert Olsson return NULL; 228619baf839SRobert Olsson } 228719baf839SRobert Olsson 228819baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2289c95aaf9aSStephen Hemminger __acquires(RCU) 229019baf839SRobert Olsson { 2291cb7b593cSStephen Hemminger rcu_read_lock(); 22921218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 229319baf839SRobert Olsson } 229419baf839SRobert Olsson 229519baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 229619baf839SRobert Olsson { 2297cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 22981218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 22993d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 23003d3b2d25SStephen Hemminger struct hlist_node *tb_node; 23013d3b2d25SStephen Hemminger unsigned int h; 230235c6edacSAlexander Duyck struct key_vector *n; 2303cb7b593cSStephen Hemminger 230419baf839SRobert Olsson ++*pos; 23053d3b2d25SStephen Hemminger /* next node in same table */ 23063d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 23073d3b2d25SStephen Hemminger if (n) 23083d3b2d25SStephen Hemminger return n; 230991b9a277SOlof Johansson 23103d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 23113d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 23120a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 23133d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 23143d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23153d3b2d25SStephen Hemminger if (n) 23163d3b2d25SStephen Hemminger goto found; 23173d3b2d25SStephen Hemminger } 2318cb7b593cSStephen Hemminger 23193d3b2d25SStephen Hemminger /* new hash chain */ 23203d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 23213d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2322b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 23233d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23243d3b2d25SStephen Hemminger if (n) 23253d3b2d25SStephen Hemminger goto found; 23263d3b2d25SStephen Hemminger } 23273d3b2d25SStephen Hemminger } 2328cb7b593cSStephen Hemminger return NULL; 23293d3b2d25SStephen Hemminger 23303d3b2d25SStephen Hemminger found: 23313d3b2d25SStephen Hemminger iter->tb = tb; 23323d3b2d25SStephen Hemminger return n; 233319baf839SRobert Olsson } 233419baf839SRobert Olsson 233519baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2336c95aaf9aSStephen Hemminger __releases(RCU) 233719baf839SRobert Olsson { 2338cb7b593cSStephen Hemminger rcu_read_unlock(); 233919baf839SRobert Olsson } 234019baf839SRobert Olsson 2341cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2342cb7b593cSStephen Hemminger { 2343a034ee3cSEric Dumazet while (n-- > 0) 2344a034ee3cSEric Dumazet seq_puts(seq, " "); 2345cb7b593cSStephen Hemminger } 234619baf839SRobert Olsson 234728d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2348cb7b593cSStephen Hemminger { 2349cb7b593cSStephen Hemminger switch (s) { 2350cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2351cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2352cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2353cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2354cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2355cb7b593cSStephen Hemminger default: 235628d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2357cb7b593cSStephen Hemminger return buf; 2358cb7b593cSStephen Hemminger } 2359cb7b593cSStephen Hemminger } 2360cb7b593cSStephen Hemminger 236136cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2362cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2363cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2364cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2365cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2366cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2367cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2368cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2369cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2370cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2371cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2372cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2373cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2374cb7b593cSStephen Hemminger }; 2375cb7b593cSStephen Hemminger 2376a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2377cb7b593cSStephen Hemminger { 2378cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2379cb7b593cSStephen Hemminger return rtn_type_names[t]; 238028d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2381cb7b593cSStephen Hemminger return buf; 2382cb7b593cSStephen Hemminger } 2383cb7b593cSStephen Hemminger 2384cb7b593cSStephen Hemminger /* Pretty print the trie */ 238519baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 238619baf839SRobert Olsson { 2387cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 238835c6edacSAlexander Duyck struct key_vector *n = v; 238919baf839SRobert Olsson 239088bae714SAlexander Duyck if (IS_TRIE(node_parent_rcu(n))) 23913d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2392095b8501SRobert Olsson 2393095b8501SRobert Olsson if (IS_TNODE(n)) { 2394adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2395095b8501SRobert Olsson 23961d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2397e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2398e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 23996e22d174SAlexander Duyck tn_info(n)->full_children, 24006e22d174SAlexander Duyck tn_info(n)->empty_children); 2401cb7b593cSStephen Hemminger } else { 2402adaf9816SAlexander Duyck __be32 val = htonl(n->key); 240379e5ad2cSAlexander Duyck struct fib_alias *fa; 2404cb7b593cSStephen Hemminger 2405cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2406673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 240728d36e37SEric Dumazet 240879e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 240928d36e37SEric Dumazet char buf1[32], buf2[32]; 241028d36e37SEric Dumazet 2411cb7b593cSStephen Hemminger seq_indent(seq, iter->depth + 1); 24125786ec60SAlexander Duyck seq_printf(seq, " /%zu %s %s", 24139b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 241428d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 241537e826c5SDavid S. Miller fa->fa_info->fib_scope), 241628d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 241728d36e37SEric Dumazet fa->fa_type)); 2418cb7b593cSStephen Hemminger if (fa->fa_tos) 2419b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2420cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2421cb7b593cSStephen Hemminger } 2422cb7b593cSStephen Hemminger } 242319baf839SRobert Olsson 242419baf839SRobert Olsson return 0; 242519baf839SRobert Olsson } 242619baf839SRobert Olsson 2427f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 242819baf839SRobert Olsson .start = fib_trie_seq_start, 242919baf839SRobert Olsson .next = fib_trie_seq_next, 243019baf839SRobert Olsson .stop = fib_trie_seq_stop, 243119baf839SRobert Olsson .show = fib_trie_seq_show, 243219baf839SRobert Olsson }; 243319baf839SRobert Olsson 243419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 243519baf839SRobert Olsson { 24361c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2437cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 243819baf839SRobert Olsson } 243919baf839SRobert Olsson 24409a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 244119baf839SRobert Olsson .owner = THIS_MODULE, 244219baf839SRobert Olsson .open = fib_trie_seq_open, 244319baf839SRobert Olsson .read = seq_read, 244419baf839SRobert Olsson .llseek = seq_lseek, 24451c340b2fSDenis V. Lunev .release = seq_release_net, 244619baf839SRobert Olsson }; 244719baf839SRobert Olsson 24488315f5d8SStephen Hemminger struct fib_route_iter { 24498315f5d8SStephen Hemminger struct seq_net_private p; 24508be33e95SAlexander Duyck struct fib_table *main_tb; 245135c6edacSAlexander Duyck struct key_vector *tnode; 24528315f5d8SStephen Hemminger loff_t pos; 24538315f5d8SStephen Hemminger t_key key; 24548315f5d8SStephen Hemminger }; 24558315f5d8SStephen Hemminger 245635c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, 245735c6edacSAlexander Duyck loff_t pos) 24588315f5d8SStephen Hemminger { 245935c6edacSAlexander Duyck struct key_vector *l, **tp = &iter->tnode; 24608be33e95SAlexander Duyck t_key key; 24618315f5d8SStephen Hemminger 24628be33e95SAlexander Duyck /* use cache location of next-to-find key */ 24638be33e95SAlexander Duyck if (iter->pos > 0 && pos >= iter->pos) { 24648315f5d8SStephen Hemminger pos -= iter->pos; 24658be33e95SAlexander Duyck key = iter->key; 24668be33e95SAlexander Duyck } else { 24678315f5d8SStephen Hemminger iter->pos = 0; 24688be33e95SAlexander Duyck key = 0; 24698315f5d8SStephen Hemminger } 24708315f5d8SStephen Hemminger 24718be33e95SAlexander Duyck while ((l = leaf_walk_rcu(tp, key)) != NULL) { 24728be33e95SAlexander Duyck key = l->key + 1; 24738315f5d8SStephen Hemminger iter->pos++; 24748be33e95SAlexander Duyck 247525b97c01SAndy Whitcroft if (--pos <= 0) 24768be33e95SAlexander Duyck break; 24778be33e95SAlexander Duyck 24788be33e95SAlexander Duyck l = NULL; 24798be33e95SAlexander Duyck 24808be33e95SAlexander Duyck /* handle unlikely case of a key wrap */ 24818be33e95SAlexander Duyck if (!key) 24828be33e95SAlexander Duyck break; 24838315f5d8SStephen Hemminger } 24848315f5d8SStephen Hemminger 24858315f5d8SStephen Hemminger if (l) 24868be33e95SAlexander Duyck iter->key = key; /* remember it */ 24878315f5d8SStephen Hemminger else 24888315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 24898315f5d8SStephen Hemminger 24908315f5d8SStephen Hemminger return l; 24918315f5d8SStephen Hemminger } 24928315f5d8SStephen Hemminger 24938315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 24948315f5d8SStephen Hemminger __acquires(RCU) 24958315f5d8SStephen Hemminger { 24968315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 24978315f5d8SStephen Hemminger struct fib_table *tb; 24988be33e95SAlexander Duyck struct trie *t; 24998315f5d8SStephen Hemminger 25008315f5d8SStephen Hemminger rcu_read_lock(); 25018be33e95SAlexander Duyck 25021218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 25038315f5d8SStephen Hemminger if (!tb) 25048315f5d8SStephen Hemminger return NULL; 25058315f5d8SStephen Hemminger 25068be33e95SAlexander Duyck iter->main_tb = tb; 250794d9f1c5SDavid Forster t = (struct trie *)tb->tb_data; 250894d9f1c5SDavid Forster iter->tnode = t->kv; 25098be33e95SAlexander Duyck 25108be33e95SAlexander Duyck if (*pos != 0) 25118be33e95SAlexander Duyck return fib_route_get_idx(iter, *pos); 25128be33e95SAlexander Duyck 25138be33e95SAlexander Duyck iter->pos = 0; 25148be33e95SAlexander Duyck iter->key = 0; 25158be33e95SAlexander Duyck 25168315f5d8SStephen Hemminger return SEQ_START_TOKEN; 25178315f5d8SStephen Hemminger } 25188315f5d8SStephen Hemminger 25198315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25208315f5d8SStephen Hemminger { 25218315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 252235c6edacSAlexander Duyck struct key_vector *l = NULL; 25238be33e95SAlexander Duyck t_key key = iter->key; 25248315f5d8SStephen Hemminger 25258315f5d8SStephen Hemminger ++*pos; 25268be33e95SAlexander Duyck 25278be33e95SAlexander Duyck /* only allow key of 0 for start of sequence */ 25288be33e95SAlexander Duyck if ((v == SEQ_START_TOKEN) || key) 25298be33e95SAlexander Duyck l = leaf_walk_rcu(&iter->tnode, key); 25308be33e95SAlexander Duyck 25318be33e95SAlexander Duyck if (l) { 25328be33e95SAlexander Duyck iter->key = l->key + 1; 25338315f5d8SStephen Hemminger iter->pos++; 25348be33e95SAlexander Duyck } else { 25358be33e95SAlexander Duyck iter->pos = 0; 25368315f5d8SStephen Hemminger } 25378315f5d8SStephen Hemminger 25388315f5d8SStephen Hemminger return l; 25398315f5d8SStephen Hemminger } 25408315f5d8SStephen Hemminger 25418315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 25428315f5d8SStephen Hemminger __releases(RCU) 25438315f5d8SStephen Hemminger { 25448315f5d8SStephen Hemminger rcu_read_unlock(); 25458315f5d8SStephen Hemminger } 25468315f5d8SStephen Hemminger 2547a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2548cb7b593cSStephen Hemminger { 2549a034ee3cSEric Dumazet unsigned int flags = 0; 2550cb7b593cSStephen Hemminger 2551a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2552a034ee3cSEric Dumazet flags = RTF_REJECT; 2553cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2554cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 255532ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2556cb7b593cSStephen Hemminger flags |= RTF_HOST; 2557cb7b593cSStephen Hemminger flags |= RTF_UP; 2558cb7b593cSStephen Hemminger return flags; 2559cb7b593cSStephen Hemminger } 2560cb7b593cSStephen Hemminger 2561cb7b593cSStephen Hemminger /* 2562cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2563cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2564cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2565cb7b593cSStephen Hemminger * legacy utilities 2566cb7b593cSStephen Hemminger */ 2567cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2568cb7b593cSStephen Hemminger { 2569654eff45SAlexander Duyck struct fib_route_iter *iter = seq->private; 2570654eff45SAlexander Duyck struct fib_table *tb = iter->main_tb; 257179e5ad2cSAlexander Duyck struct fib_alias *fa; 257235c6edacSAlexander Duyck struct key_vector *l = v; 25739b6ebad5SAlexander Duyck __be32 prefix; 2574cb7b593cSStephen Hemminger 2575cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2576cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2577cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2578cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2579cb7b593cSStephen Hemminger return 0; 2580cb7b593cSStephen Hemminger } 2581cb7b593cSStephen Hemminger 25829b6ebad5SAlexander Duyck prefix = htonl(l->key); 25839b6ebad5SAlexander Duyck 258479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 25851371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 25869b6ebad5SAlexander Duyck __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); 2587a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2588cb7b593cSStephen Hemminger 258979e5ad2cSAlexander Duyck if ((fa->fa_type == RTN_BROADCAST) || 259079e5ad2cSAlexander Duyck (fa->fa_type == RTN_MULTICAST)) 2591cb7b593cSStephen Hemminger continue; 2592cb7b593cSStephen Hemminger 2593654eff45SAlexander Duyck if (fa->tb_id != tb->tb_id) 2594654eff45SAlexander Duyck continue; 2595654eff45SAlexander Duyck 2596652586dfSTetsuo Handa seq_setwidth(seq, 127); 2597652586dfSTetsuo Handa 2598cb7b593cSStephen Hemminger if (fi) 25995e659e4cSPavel Emelyanov seq_printf(seq, 26005e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2601652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2602cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2603cb7b593cSStephen Hemminger prefix, 2604cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2605cb7b593cSStephen Hemminger fi->fib_priority, 2606cb7b593cSStephen Hemminger mask, 2607a07f5f50SStephen Hemminger (fi->fib_advmss ? 2608a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2609cb7b593cSStephen Hemminger fi->fib_window, 2610652586dfSTetsuo Handa fi->fib_rtt >> 3); 2611cb7b593cSStephen Hemminger else 26125e659e4cSPavel Emelyanov seq_printf(seq, 26135e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2614652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2615cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2616652586dfSTetsuo Handa mask, 0, 0, 0); 2617cb7b593cSStephen Hemminger 2618652586dfSTetsuo Handa seq_pad(seq, '\n'); 2619cb7b593cSStephen Hemminger } 2620cb7b593cSStephen Hemminger 2621cb7b593cSStephen Hemminger return 0; 2622cb7b593cSStephen Hemminger } 2623cb7b593cSStephen Hemminger 2624f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 26258315f5d8SStephen Hemminger .start = fib_route_seq_start, 26268315f5d8SStephen Hemminger .next = fib_route_seq_next, 26278315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2628cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2629cb7b593cSStephen Hemminger }; 2630cb7b593cSStephen Hemminger 2631cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2632cb7b593cSStephen Hemminger { 26331c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 26348315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2635cb7b593cSStephen Hemminger } 2636cb7b593cSStephen Hemminger 26379a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2638cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2639cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2640cb7b593cSStephen Hemminger .read = seq_read, 2641cb7b593cSStephen Hemminger .llseek = seq_lseek, 26421c340b2fSDenis V. Lunev .release = seq_release_net, 2643cb7b593cSStephen Hemminger }; 2644cb7b593cSStephen Hemminger 264561a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 264619baf839SRobert Olsson { 2647d4beaa66SGao feng if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) 2648cb7b593cSStephen Hemminger goto out1; 2649cb7b593cSStephen Hemminger 2650d4beaa66SGao feng if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, 265161a02653SDenis V. Lunev &fib_triestat_fops)) 2652cb7b593cSStephen Hemminger goto out2; 2653cb7b593cSStephen Hemminger 2654d4beaa66SGao feng if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) 2655cb7b593cSStephen Hemminger goto out3; 2656cb7b593cSStephen Hemminger 265719baf839SRobert Olsson return 0; 2658cb7b593cSStephen Hemminger 2659cb7b593cSStephen Hemminger out3: 2660ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2661cb7b593cSStephen Hemminger out2: 2662ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2663cb7b593cSStephen Hemminger out1: 2664cb7b593cSStephen Hemminger return -ENOMEM; 266519baf839SRobert Olsson } 266619baf839SRobert Olsson 266761a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 266819baf839SRobert Olsson { 2669ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2670ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2671ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 267219baf839SRobert Olsson } 267319baf839SRobert Olsson 267419baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2675