119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 541977f032SJiri Slaby #include <linux/bitops.h> 5519baf839SRobert Olsson #include <linux/types.h> 5619baf839SRobert Olsson #include <linux/kernel.h> 5719baf839SRobert Olsson #include <linux/mm.h> 5819baf839SRobert Olsson #include <linux/string.h> 5919baf839SRobert Olsson #include <linux/socket.h> 6019baf839SRobert Olsson #include <linux/sockios.h> 6119baf839SRobert Olsson #include <linux/errno.h> 6219baf839SRobert Olsson #include <linux/in.h> 6319baf839SRobert Olsson #include <linux/inet.h> 64cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6519baf839SRobert Olsson #include <linux/netdevice.h> 6619baf839SRobert Olsson #include <linux/if_arp.h> 6719baf839SRobert Olsson #include <linux/proc_fs.h> 682373ce1cSRobert Olsson #include <linux/rcupdate.h> 6919baf839SRobert Olsson #include <linux/skbuff.h> 7019baf839SRobert Olsson #include <linux/netlink.h> 7119baf839SRobert Olsson #include <linux/init.h> 7219baf839SRobert Olsson #include <linux/list.h> 735a0e3ad6STejun Heo #include <linux/slab.h> 74bc3b2d7fSPaul Gortmaker #include <linux/export.h> 75ffa915d0SDavid S. Miller #include <linux/vmalloc.h> 76457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7719baf839SRobert Olsson #include <net/ip.h> 7819baf839SRobert Olsson #include <net/protocol.h> 7919baf839SRobert Olsson #include <net/route.h> 8019baf839SRobert Olsson #include <net/tcp.h> 8119baf839SRobert Olsson #include <net/sock.h> 8219baf839SRobert Olsson #include <net/ip_fib.h> 838e05fd71SScott Feldman #include <net/switchdev.h> 84f6d3c192SDavid Ahern #include <trace/events/fib.h> 8519baf839SRobert Olsson #include "fib_lookup.h" 8619baf839SRobert Olsson 8706ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8819baf839SRobert Olsson 8919baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 9095f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 9119baf839SRobert Olsson 9219baf839SRobert Olsson typedef unsigned int t_key; 9319baf839SRobert Olsson 9488bae714SAlexander Duyck #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) 9564c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 9664c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 972373ce1cSRobert Olsson 9835c6edacSAlexander Duyck struct key_vector { 9941b489fdSAlexander Duyck t_key key; 10041b489fdSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 10141b489fdSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 10241b489fdSAlexander Duyck unsigned char slen; 10341b489fdSAlexander Duyck union { 10441b489fdSAlexander Duyck /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ 10579e5ad2cSAlexander Duyck struct hlist_head leaf; 10641b489fdSAlexander Duyck /* This array is valid if (pos | bits) > 0 (TNODE) */ 10735c6edacSAlexander Duyck struct key_vector __rcu *tnode[0]; 10819baf839SRobert Olsson }; 109adaf9816SAlexander Duyck }; 11019baf839SRobert Olsson 111dc35dbedSAlexander Duyck struct tnode { 11256ca2adfSAlexander Duyck struct rcu_head rcu; 1136e22d174SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 1146e22d174SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 115f23e59fbSAlexander Duyck struct key_vector __rcu *parent; 116dc35dbedSAlexander Duyck struct key_vector kv[1]; 11756ca2adfSAlexander Duyck #define tn_bits kv[0].bits 118dc35dbedSAlexander Duyck }; 119dc35dbedSAlexander Duyck 120dc35dbedSAlexander Duyck #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) 12141b489fdSAlexander Duyck #define LEAF_SIZE TNODE_SIZE(1) 12241b489fdSAlexander Duyck 12319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 12419baf839SRobert Olsson struct trie_use_stats { 12519baf839SRobert Olsson unsigned int gets; 12619baf839SRobert Olsson unsigned int backtrack; 12719baf839SRobert Olsson unsigned int semantic_match_passed; 12819baf839SRobert Olsson unsigned int semantic_match_miss; 12919baf839SRobert Olsson unsigned int null_node_hit; 1302f36895aSRobert Olsson unsigned int resize_node_skipped; 13119baf839SRobert Olsson }; 13219baf839SRobert Olsson #endif 13319baf839SRobert Olsson 13419baf839SRobert Olsson struct trie_stat { 13519baf839SRobert Olsson unsigned int totdepth; 13619baf839SRobert Olsson unsigned int maxdepth; 13719baf839SRobert Olsson unsigned int tnodes; 13819baf839SRobert Olsson unsigned int leaves; 13919baf839SRobert Olsson unsigned int nullpointers; 14093672292SStephen Hemminger unsigned int prefixes; 14106ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 14219baf839SRobert Olsson }; 14319baf839SRobert Olsson 14419baf839SRobert Olsson struct trie { 14588bae714SAlexander Duyck struct key_vector kv[1]; 14619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1478274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 14819baf839SRobert Olsson #endif 14919baf839SRobert Olsson }; 15019baf839SRobert Olsson 15188bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn); 152c3059477SJarek Poplawski static size_t tnode_free_size; 153c3059477SJarek Poplawski 154c3059477SJarek Poplawski /* 155c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 156c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 157c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 158c3059477SJarek Poplawski */ 159c3059477SJarek Poplawski static const int sync_pages = 128; 16019baf839SRobert Olsson 161e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 162bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 16319baf839SRobert Olsson 16456ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv) 16556ca2adfSAlexander Duyck { 16656ca2adfSAlexander Duyck return container_of(kv, struct tnode, kv[0]); 16756ca2adfSAlexander Duyck } 16856ca2adfSAlexander Duyck 16964c9b6fbSAlexander Duyck /* caller must hold RTNL */ 170f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) 171754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) 1720a5c0475SEric Dumazet 17364c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 174f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) 175754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) 1760a5c0475SEric Dumazet 17764c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 17835c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) 17906801916SStephen Hemminger { 180adaf9816SAlexander Duyck if (n) 181f23e59fbSAlexander Duyck rcu_assign_pointer(tn_info(n)->parent, tp); 18264c9b6fbSAlexander Duyck } 18364c9b6fbSAlexander Duyck 184f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) 18564c9b6fbSAlexander Duyck 18664c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 18764c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 18864c9b6fbSAlexander Duyck */ 1892e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn) 19064c9b6fbSAlexander Duyck { 19164c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 19206801916SStephen Hemminger } 1932373ce1cSRobert Olsson 19488bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) 19588bae714SAlexander Duyck 1962e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv) 1972e1ac88aSAlexander Duyck { 1982e1ac88aSAlexander Duyck unsigned long index = key ^ kv->key; 1992e1ac88aSAlexander Duyck 20088bae714SAlexander Duyck if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) 20188bae714SAlexander Duyck return 0; 20288bae714SAlexander Duyck 2032e1ac88aSAlexander Duyck return index >> kv->pos; 2042e1ac88aSAlexander Duyck } 2052e1ac88aSAlexander Duyck 206e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 207e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 208e9b44019SAlexander Duyck * all of the bits in that key are significant. 209e9b44019SAlexander Duyck * 210e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 211e9b44019SAlexander Duyck * 212e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 213e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 214e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 215e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 216e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 217e9b44019SAlexander Duyck * correct key path. 218e9b44019SAlexander Duyck * 219e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 220e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 221e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 222e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 223e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 224e9b44019SAlexander Duyck * 225e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 226e9b44019SAlexander Duyck * have many different meanings. 227e9b44019SAlexander Duyck * 228e9b44019SAlexander Duyck * Example: 229e9b44019SAlexander Duyck * _________________________________________________________________ 230e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 231e9b44019SAlexander Duyck * ----------------------------------------------------------------- 232e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 233e9b44019SAlexander Duyck * 234e9b44019SAlexander Duyck * _________________________________________________________________ 235e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 236e9b44019SAlexander Duyck * ----------------------------------------------------------------- 237e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 238e9b44019SAlexander Duyck * 239e9b44019SAlexander Duyck * tp->pos = 22 240e9b44019SAlexander Duyck * tp->bits = 3 241e9b44019SAlexander Duyck * n->pos = 13 242e9b44019SAlexander Duyck * n->bits = 4 243e9b44019SAlexander Duyck * 244e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 245e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 246e9b44019SAlexander Duyck * point we do not use them for anything. 247e9b44019SAlexander Duyck * 248e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 249e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 250e9b44019SAlexander Duyck * 'n' among tp's children. 251e9b44019SAlexander Duyck * 252e9b44019SAlexander Duyck * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits 253e9b44019SAlexander Duyck * for the node n. 254e9b44019SAlexander Duyck * 255e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 256e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 257e9b44019SAlexander Duyck * 258e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 259e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 260e9b44019SAlexander Duyck * 261e9b44019SAlexander Duyck * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown 262e9b44019SAlexander Duyck * at this point. 26319baf839SRobert Olsson */ 26419baf839SRobert Olsson 265f5026fabSDenis V. Lunev static const int halve_threshold = 25; 266f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 267345aa031SJarek Poplawski static const int halve_threshold_root = 15; 26880b71b80SJens Låås static const int inflate_threshold_root = 30; 2692373ce1cSRobert Olsson 2702373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 2712373ce1cSRobert Olsson { 2722373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 2732373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 2742373ce1cSRobert Olsson } 2752373ce1cSRobert Olsson 2762373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 2772373ce1cSRobert Olsson { 2782373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 2792373ce1cSRobert Olsson } 2802373ce1cSRobert Olsson 28137fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 28235c6edacSAlexander Duyck ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 2831de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \ 28435c6edacSAlexander Duyck ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 28537fd30f2SAlexander Duyck 28637fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 2872373ce1cSRobert Olsson { 28856ca2adfSAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 28937fd30f2SAlexander Duyck 29056ca2adfSAlexander Duyck if (!n->tn_bits) 29137fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 29256ca2adfSAlexander Duyck else if (n->tn_bits <= TNODE_KMALLOC_MAX) 29337fd30f2SAlexander Duyck kfree(n); 29437fd30f2SAlexander Duyck else 29537fd30f2SAlexander Duyck vfree(n); 2962373ce1cSRobert Olsson } 2972373ce1cSRobert Olsson 29856ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 299387a5487SStephen Hemminger 300dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits) 3012373ce1cSRobert Olsson { 3021de3d87bSAlexander Duyck size_t size; 3031de3d87bSAlexander Duyck 3041de3d87bSAlexander Duyck /* verify bits is within bounds */ 3051de3d87bSAlexander Duyck if (bits > TNODE_VMALLOC_MAX) 3061de3d87bSAlexander Duyck return NULL; 3071de3d87bSAlexander Duyck 3081de3d87bSAlexander Duyck /* determine size and verify it is non-zero and didn't overflow */ 3091de3d87bSAlexander Duyck size = TNODE_SIZE(1ul << bits); 3101de3d87bSAlexander Duyck 3112373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3128d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 31315be75cdSStephen Hemminger else 3147a1c8e5aSEric Dumazet return vzalloc(size); 31515be75cdSStephen Hemminger } 3162373ce1cSRobert Olsson 31735c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n) 31895f60ea3SAlexander Duyck { 3196e22d174SAlexander Duyck ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; 32095f60ea3SAlexander Duyck } 32195f60ea3SAlexander Duyck 32235c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n) 32395f60ea3SAlexander Duyck { 3246e22d174SAlexander Duyck tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; 32595f60ea3SAlexander Duyck } 32695f60ea3SAlexander Duyck 32735c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 32819baf839SRobert Olsson { 329f38b24c9SFiro Yang struct key_vector *l; 330f38b24c9SFiro Yang struct tnode *kv; 331dc35dbedSAlexander Duyck 332f38b24c9SFiro Yang kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 333dc35dbedSAlexander Duyck if (!kv) 334dc35dbedSAlexander Duyck return NULL; 335dc35dbedSAlexander Duyck 336dc35dbedSAlexander Duyck /* initialize key vector */ 337f38b24c9SFiro Yang l = kv->kv; 33864c9b6fbSAlexander Duyck l->key = key; 339e9b44019SAlexander Duyck l->pos = 0; 34064c9b6fbSAlexander Duyck l->bits = 0; 341dc35dbedSAlexander Duyck l->slen = fa->fa_slen; 34264c9b6fbSAlexander Duyck 343d5d6487cSAlexander Duyck /* link leaf to fib alias */ 34479e5ad2cSAlexander Duyck INIT_HLIST_HEAD(&l->leaf); 345d5d6487cSAlexander Duyck hlist_add_head(&fa->fa_list, &l->leaf); 346dc35dbedSAlexander Duyck 34719baf839SRobert Olsson return l; 34819baf839SRobert Olsson } 34919baf839SRobert Olsson 35035c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits) 35119baf839SRobert Olsson { 35264c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 353f38b24c9SFiro Yang struct key_vector *tn; 354f38b24c9SFiro Yang struct tnode *tnode; 35564c9b6fbSAlexander Duyck 35664c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 35764c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 35819baf839SRobert Olsson 359f38b24c9SFiro Yang tnode = tnode_alloc(bits); 360dc35dbedSAlexander Duyck if (!tnode) 361dc35dbedSAlexander Duyck return NULL; 362dc35dbedSAlexander Duyck 363f38b24c9SFiro Yang pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 364f38b24c9SFiro Yang sizeof(struct key_vector *) << bits); 365f38b24c9SFiro Yang 36695f60ea3SAlexander Duyck if (bits == KEYLENGTH) 3676e22d174SAlexander Duyck tnode->full_children = 1; 36895f60ea3SAlexander Duyck else 3696e22d174SAlexander Duyck tnode->empty_children = 1ul << bits; 370c877efb2SStephen Hemminger 371f38b24c9SFiro Yang tn = tnode->kv; 372dc35dbedSAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 373dc35dbedSAlexander Duyck tn->pos = pos; 374dc35dbedSAlexander Duyck tn->bits = bits; 375dc35dbedSAlexander Duyck tn->slen = pos; 376dc35dbedSAlexander Duyck 37719baf839SRobert Olsson return tn; 37819baf839SRobert Olsson } 37919baf839SRobert Olsson 380e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 38119baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 38219baf839SRobert Olsson */ 38335c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n) 38419baf839SRobert Olsson { 385e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 38619baf839SRobert Olsson } 38719baf839SRobert Olsson 388ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 38919baf839SRobert Olsson * Update the value of full_children and empty_children. 39019baf839SRobert Olsson */ 39135c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i, 39235c6edacSAlexander Duyck struct key_vector *n) 39319baf839SRobert Olsson { 394754baf8dSAlexander Duyck struct key_vector *chi = get_child(tn, i); 395ff181ed8SAlexander Duyck int isfull, wasfull; 39619baf839SRobert Olsson 3972e1ac88aSAlexander Duyck BUG_ON(i >= child_length(tn)); 3980c7770c7SStephen Hemminger 39995f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 40000db4124SIan Morris if (!n && chi) 40195f60ea3SAlexander Duyck empty_child_inc(tn); 40200db4124SIan Morris if (n && !chi) 40395f60ea3SAlexander Duyck empty_child_dec(tn); 40419baf839SRobert Olsson 40519baf839SRobert Olsson /* update fullChildren */ 40619baf839SRobert Olsson wasfull = tnode_full(tn, chi); 40719baf839SRobert Olsson isfull = tnode_full(tn, n); 408ff181ed8SAlexander Duyck 40919baf839SRobert Olsson if (wasfull && !isfull) 4106e22d174SAlexander Duyck tn_info(tn)->full_children--; 41119baf839SRobert Olsson else if (!wasfull && isfull) 4126e22d174SAlexander Duyck tn_info(tn)->full_children++; 41391b9a277SOlof Johansson 4145405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4155405afd1SAlexander Duyck tn->slen = n->slen; 4165405afd1SAlexander Duyck 41741b489fdSAlexander Duyck rcu_assign_pointer(tn->tnode[i], n); 41819baf839SRobert Olsson } 41919baf839SRobert Olsson 42035c6edacSAlexander Duyck static void update_children(struct key_vector *tn) 42169fa57b1SAlexander Duyck { 42269fa57b1SAlexander Duyck unsigned long i; 42369fa57b1SAlexander Duyck 42469fa57b1SAlexander Duyck /* update all of the child parent pointers */ 4252e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 426754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 42769fa57b1SAlexander Duyck 42869fa57b1SAlexander Duyck if (!inode) 42969fa57b1SAlexander Duyck continue; 43069fa57b1SAlexander Duyck 43169fa57b1SAlexander Duyck /* Either update the children of a tnode that 43269fa57b1SAlexander Duyck * already belongs to us or update the child 43369fa57b1SAlexander Duyck * to point to ourselves. 43469fa57b1SAlexander Duyck */ 43569fa57b1SAlexander Duyck if (node_parent(inode) == tn) 43669fa57b1SAlexander Duyck update_children(inode); 43769fa57b1SAlexander Duyck else 43869fa57b1SAlexander Duyck node_set_parent(inode, tn); 43969fa57b1SAlexander Duyck } 44069fa57b1SAlexander Duyck } 44169fa57b1SAlexander Duyck 44288bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key, 44388bae714SAlexander Duyck struct key_vector *n) 444836a0123SAlexander Duyck { 44588bae714SAlexander Duyck if (IS_TRIE(tp)) 44688bae714SAlexander Duyck rcu_assign_pointer(tp->tnode[0], n); 447836a0123SAlexander Duyck else 44888bae714SAlexander Duyck put_child(tp, get_index(key, tp), n); 449836a0123SAlexander Duyck } 450836a0123SAlexander Duyck 45135c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn) 4520a5c0475SEric Dumazet { 45356ca2adfSAlexander Duyck tn_info(tn)->rcu.next = NULL; 4540a5c0475SEric Dumazet } 455fc86a93bSAlexander Duyck 45635c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn, 45735c6edacSAlexander Duyck struct key_vector *n) 458fc86a93bSAlexander Duyck { 45956ca2adfSAlexander Duyck tn_info(n)->rcu.next = tn_info(tn)->rcu.next; 46056ca2adfSAlexander Duyck tn_info(tn)->rcu.next = &tn_info(n)->rcu; 461fc86a93bSAlexander Duyck } 462fc86a93bSAlexander Duyck 46335c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn) 464fc86a93bSAlexander Duyck { 46556ca2adfSAlexander Duyck struct callback_head *head = &tn_info(tn)->rcu; 466fc86a93bSAlexander Duyck 467fc86a93bSAlexander Duyck while (head) { 468fc86a93bSAlexander Duyck head = head->next; 46941b489fdSAlexander Duyck tnode_free_size += TNODE_SIZE(1ul << tn->bits); 47037fd30f2SAlexander Duyck node_free(tn); 471fc86a93bSAlexander Duyck 47256ca2adfSAlexander Duyck tn = container_of(head, struct tnode, rcu)->kv; 473fc86a93bSAlexander Duyck } 474fc86a93bSAlexander Duyck 475fc86a93bSAlexander Duyck if (tnode_free_size >= PAGE_SIZE * sync_pages) { 476fc86a93bSAlexander Duyck tnode_free_size = 0; 477fc86a93bSAlexander Duyck synchronize_rcu(); 478fc86a93bSAlexander Duyck } 4790a5c0475SEric Dumazet } 4800a5c0475SEric Dumazet 48188bae714SAlexander Duyck static struct key_vector *replace(struct trie *t, 48235c6edacSAlexander Duyck struct key_vector *oldtnode, 48335c6edacSAlexander Duyck struct key_vector *tn) 48469fa57b1SAlexander Duyck { 48535c6edacSAlexander Duyck struct key_vector *tp = node_parent(oldtnode); 48669fa57b1SAlexander Duyck unsigned long i; 48769fa57b1SAlexander Duyck 48869fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 48969fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 49088bae714SAlexander Duyck put_child_root(tp, tn->key, tn); 49169fa57b1SAlexander Duyck 49269fa57b1SAlexander Duyck /* update all of the child parent pointers */ 49369fa57b1SAlexander Duyck update_children(tn); 49469fa57b1SAlexander Duyck 49569fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 49669fa57b1SAlexander Duyck tnode_free(oldtnode); 49769fa57b1SAlexander Duyck 49869fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 4992e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 500754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 50169fa57b1SAlexander Duyck 50269fa57b1SAlexander Duyck /* resize child node */ 50369fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 50488bae714SAlexander Duyck tn = resize(t, inode); 50569fa57b1SAlexander Duyck } 5068d8e810cSAlexander Duyck 50788bae714SAlexander Duyck return tp; 50869fa57b1SAlexander Duyck } 50969fa57b1SAlexander Duyck 51088bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t, 51135c6edacSAlexander Duyck struct key_vector *oldtnode) 51219baf839SRobert Olsson { 51335c6edacSAlexander Duyck struct key_vector *tn; 51469fa57b1SAlexander Duyck unsigned long i; 515e9b44019SAlexander Duyck t_key m; 51619baf839SRobert Olsson 5170c7770c7SStephen Hemminger pr_debug("In inflate\n"); 51819baf839SRobert Olsson 519e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5202f80b3c8SRobert Olsson if (!tn) 5218d8e810cSAlexander Duyck goto notnode; 5222f36895aSRobert Olsson 52369fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 52469fa57b1SAlexander Duyck tnode_free_init(oldtnode); 52569fa57b1SAlexander Duyck 52612c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 52712c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 52812c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 52912c081a5SAlexander Duyck * nodes. 5302f36895aSRobert Olsson */ 5312e1ac88aSAlexander Duyck for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { 532754baf8dSAlexander Duyck struct key_vector *inode = get_child(oldtnode, --i); 53335c6edacSAlexander Duyck struct key_vector *node0, *node1; 53469fa57b1SAlexander Duyck unsigned long j, k; 53519baf839SRobert Olsson 53619baf839SRobert Olsson /* An empty child */ 53751456b29SIan Morris if (!inode) 53819baf839SRobert Olsson continue; 53919baf839SRobert Olsson 54019baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 541adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 542e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 54319baf839SRobert Olsson continue; 54419baf839SRobert Olsson } 54519baf839SRobert Olsson 54669fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 54769fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 54869fa57b1SAlexander Duyck 54919baf839SRobert Olsson /* An internal node with two children */ 55019baf839SRobert Olsson if (inode->bits == 1) { 551754baf8dSAlexander Duyck put_child(tn, 2 * i + 1, get_child(inode, 1)); 552754baf8dSAlexander Duyck put_child(tn, 2 * i, get_child(inode, 0)); 55391b9a277SOlof Johansson continue; 55419baf839SRobert Olsson } 55519baf839SRobert Olsson 55619baf839SRobert Olsson /* We will replace this node 'inode' with two new 55712c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 55819baf839SRobert Olsson * original children. The two new nodes will have 55919baf839SRobert Olsson * a position one bit further down the key and this 56019baf839SRobert Olsson * means that the "significant" part of their keys 56119baf839SRobert Olsson * (see the discussion near the top of this file) 56219baf839SRobert Olsson * will differ by one bit, which will be "0" in 56312c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 56419baf839SRobert Olsson * moving the key position by one step, the bit that 56519baf839SRobert Olsson * we are moving away from - the bit at position 56612c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 56712c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 56819baf839SRobert Olsson * two new keys. 56919baf839SRobert Olsson */ 57012c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 57112c081a5SAlexander Duyck if (!node1) 57212c081a5SAlexander Duyck goto nomem; 57369fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 57419baf839SRobert Olsson 57569fa57b1SAlexander Duyck tnode_free_append(tn, node1); 57612c081a5SAlexander Duyck if (!node0) 57712c081a5SAlexander Duyck goto nomem; 57812c081a5SAlexander Duyck tnode_free_append(tn, node0); 5792f36895aSRobert Olsson 58012c081a5SAlexander Duyck /* populate child pointers in new nodes */ 5812e1ac88aSAlexander Duyck for (k = child_length(inode), j = k / 2; j;) { 582754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 583754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 584754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 585754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 58619baf839SRobert Olsson } 587ff181ed8SAlexander Duyck 58812c081a5SAlexander Duyck /* link new nodes to parent */ 58912c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 59012c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 59112c081a5SAlexander Duyck 59212c081a5SAlexander Duyck /* link parent to nodes */ 59312c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 59412c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 59512c081a5SAlexander Duyck } 59612c081a5SAlexander Duyck 59769fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 5988d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 599ff181ed8SAlexander Duyck nomem: 600fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 601fc86a93bSAlexander Duyck tnode_free(tn); 6028d8e810cSAlexander Duyck notnode: 6038d8e810cSAlexander Duyck return NULL; 604ff181ed8SAlexander Duyck } 605ff181ed8SAlexander Duyck 60688bae714SAlexander Duyck static struct key_vector *halve(struct trie *t, 60735c6edacSAlexander Duyck struct key_vector *oldtnode) 60819baf839SRobert Olsson { 60935c6edacSAlexander Duyck struct key_vector *tn; 61012c081a5SAlexander Duyck unsigned long i; 61119baf839SRobert Olsson 6120c7770c7SStephen Hemminger pr_debug("In halve\n"); 61319baf839SRobert Olsson 614e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6152f80b3c8SRobert Olsson if (!tn) 6168d8e810cSAlexander Duyck goto notnode; 6172f36895aSRobert Olsson 61869fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 61969fa57b1SAlexander Duyck tnode_free_init(oldtnode); 62069fa57b1SAlexander Duyck 62112c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 62212c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 62312c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 62412c081a5SAlexander Duyck * nodes. 6252f36895aSRobert Olsson */ 6262e1ac88aSAlexander Duyck for (i = child_length(oldtnode); i;) { 627754baf8dSAlexander Duyck struct key_vector *node1 = get_child(oldtnode, --i); 628754baf8dSAlexander Duyck struct key_vector *node0 = get_child(oldtnode, --i); 62935c6edacSAlexander Duyck struct key_vector *inode; 6302f36895aSRobert Olsson 63112c081a5SAlexander Duyck /* At least one of the children is empty */ 63212c081a5SAlexander Duyck if (!node1 || !node0) { 63312c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 63412c081a5SAlexander Duyck continue; 63512c081a5SAlexander Duyck } 6362f36895aSRobert Olsson 6372f36895aSRobert Olsson /* Two nonempty children */ 63812c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 6398d8e810cSAlexander Duyck if (!inode) 6408d8e810cSAlexander Duyck goto nomem; 64112c081a5SAlexander Duyck tnode_free_append(tn, inode); 6422f80b3c8SRobert Olsson 64312c081a5SAlexander Duyck /* initialize pointers out of node */ 64412c081a5SAlexander Duyck put_child(inode, 1, node1); 64512c081a5SAlexander Duyck put_child(inode, 0, node0); 64612c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 64712c081a5SAlexander Duyck 64812c081a5SAlexander Duyck /* link parent to node */ 64912c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6502f36895aSRobert Olsson } 6512f36895aSRobert Olsson 65269fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6538d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 6548d8e810cSAlexander Duyck nomem: 6558d8e810cSAlexander Duyck /* all pointers should be clean so we are done */ 6568d8e810cSAlexander Duyck tnode_free(tn); 6578d8e810cSAlexander Duyck notnode: 6588d8e810cSAlexander Duyck return NULL; 6592f80b3c8SRobert Olsson } 66019baf839SRobert Olsson 66188bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t, 66288bae714SAlexander Duyck struct key_vector *oldtnode) 66395f60ea3SAlexander Duyck { 66435c6edacSAlexander Duyck struct key_vector *n, *tp; 66595f60ea3SAlexander Duyck unsigned long i; 66695f60ea3SAlexander Duyck 66795f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 6682e1ac88aSAlexander Duyck for (n = NULL, i = child_length(oldtnode); !n && i;) 669754baf8dSAlexander Duyck n = get_child(oldtnode, --i); 67095f60ea3SAlexander Duyck 67195f60ea3SAlexander Duyck /* compress one level */ 67295f60ea3SAlexander Duyck tp = node_parent(oldtnode); 67388bae714SAlexander Duyck put_child_root(tp, oldtnode->key, n); 67495f60ea3SAlexander Duyck node_set_parent(n, tp); 67595f60ea3SAlexander Duyck 67695f60ea3SAlexander Duyck /* drop dead node */ 67795f60ea3SAlexander Duyck node_free(oldtnode); 67888bae714SAlexander Duyck 67988bae714SAlexander Duyck return tp; 68095f60ea3SAlexander Duyck } 68195f60ea3SAlexander Duyck 68235c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn) 6835405afd1SAlexander Duyck { 6845405afd1SAlexander Duyck unsigned char slen = tn->pos; 6855405afd1SAlexander Duyck unsigned long stride, i; 6865405afd1SAlexander Duyck 6875405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 6885405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 6895405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 6905405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 6915405afd1SAlexander Duyck */ 6922e1ac88aSAlexander Duyck for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { 693754baf8dSAlexander Duyck struct key_vector *n = get_child(tn, i); 6945405afd1SAlexander Duyck 6955405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 6965405afd1SAlexander Duyck continue; 6975405afd1SAlexander Duyck 6985405afd1SAlexander Duyck /* update stride and slen based on new value */ 6995405afd1SAlexander Duyck stride <<= (n->slen - slen); 7005405afd1SAlexander Duyck slen = n->slen; 7015405afd1SAlexander Duyck i &= ~(stride - 1); 7025405afd1SAlexander Duyck 7035405afd1SAlexander Duyck /* if slen covers all but the last bit we can stop here 7045405afd1SAlexander Duyck * there will be nothing longer than that since only node 7055405afd1SAlexander Duyck * 0 and 1 << (bits - 1) could have that as their suffix 7065405afd1SAlexander Duyck * length. 7075405afd1SAlexander Duyck */ 7085405afd1SAlexander Duyck if ((slen + 1) >= (tn->pos + tn->bits)) 7095405afd1SAlexander Duyck break; 7105405afd1SAlexander Duyck } 7115405afd1SAlexander Duyck 7125405afd1SAlexander Duyck tn->slen = slen; 7135405afd1SAlexander Duyck 7145405afd1SAlexander Duyck return slen; 7155405afd1SAlexander Duyck } 7165405afd1SAlexander Duyck 717f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 718cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 719cf3637bbSAlexander Duyck * Telecommunications, page 6: 720cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 721cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 722cf3637bbSAlexander Duyck * 723cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 724cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 7252e1ac88aSAlexander Duyck * child_length() and instead of multiplying by 2 (since the 726cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 727cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 728cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 729cf3637bbSAlexander Duyck * 7302e1ac88aSAlexander Duyck * The left-hand side may look a bit weird: child_length(tn) 731cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 732cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 733cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 734cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 735cf3637bbSAlexander Duyck * we just count them one extra time here. 736cf3637bbSAlexander Duyck * 737cf3637bbSAlexander Duyck * A clearer way to write this would be: 738cf3637bbSAlexander Duyck * 739cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 7402e1ac88aSAlexander Duyck * not_to_be_doubled = child_length(tn) - tn->empty_children - 741cf3637bbSAlexander Duyck * tn->full_children; 742cf3637bbSAlexander Duyck * 7432e1ac88aSAlexander Duyck * new_child_length = child_length(tn) * 2; 744cf3637bbSAlexander Duyck * 745cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 746cf3637bbSAlexander Duyck * new_child_length; 747cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 748cf3637bbSAlexander Duyck * 749cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 750cf3637bbSAlexander Duyck * 751cf3637bbSAlexander Duyck * anyway, 752cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 753cf3637bbSAlexander Duyck * inflate_threshold 754cf3637bbSAlexander Duyck * 755cf3637bbSAlexander Duyck * avoid a division: 756cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 757cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 758cf3637bbSAlexander Duyck * 759cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 7602e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 761cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 762cf3637bbSAlexander Duyck * 763cf3637bbSAlexander Duyck * expand new_child_length: 7642e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 765cf3637bbSAlexander Duyck * tn->full_children) >= 7662e1ac88aSAlexander Duyck * inflate_threshold * child_length(tn) * 2 767cf3637bbSAlexander Duyck * 768cf3637bbSAlexander Duyck * shorten again: 7692e1ac88aSAlexander Duyck * 50 * (tn->full_children + child_length(tn) - 770cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 7712e1ac88aSAlexander Duyck * child_length(tn) 772cf3637bbSAlexander Duyck * 773cf3637bbSAlexander Duyck */ 77435c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) 775f05a4819SAlexander Duyck { 7762e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 777f05a4819SAlexander Duyck unsigned long threshold = used; 778cf3637bbSAlexander Duyck 779cf3637bbSAlexander Duyck /* Keep root node larger */ 78088bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; 7816e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 7826e22d174SAlexander Duyck used += tn_info(tn)->full_children; 783cf3637bbSAlexander Duyck 78495f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 78595f60ea3SAlexander Duyck 78695f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 787cf3637bbSAlexander Duyck } 788cf3637bbSAlexander Duyck 78935c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) 790f05a4819SAlexander Duyck { 7912e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 792f05a4819SAlexander Duyck unsigned long threshold = used; 793cf3637bbSAlexander Duyck 794f05a4819SAlexander Duyck /* Keep root node larger */ 79588bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; 7966e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 797f05a4819SAlexander Duyck 79895f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 79995f60ea3SAlexander Duyck 80095f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 80195f60ea3SAlexander Duyck } 80295f60ea3SAlexander Duyck 80335c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn) 80495f60ea3SAlexander Duyck { 8052e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 80695f60ea3SAlexander Duyck 8076e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 80895f60ea3SAlexander Duyck 80995f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 8106e22d174SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) 81195f60ea3SAlexander Duyck used -= KEY_MAX; 81295f60ea3SAlexander Duyck 81395f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 81495f60ea3SAlexander Duyck return used < 2; 815f05a4819SAlexander Duyck } 816f05a4819SAlexander Duyck 817f05a4819SAlexander Duyck #define MAX_WORK 10 81888bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn) 819f05a4819SAlexander Duyck { 8208d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8218d8e810cSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 8228d8e810cSAlexander Duyck #endif 82335c6edacSAlexander Duyck struct key_vector *tp = node_parent(tn); 82488bae714SAlexander Duyck unsigned long cindex = get_index(tn->key, tp); 825a80e89d4SAlexander Duyck int max_work = MAX_WORK; 826f05a4819SAlexander Duyck 827f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 828f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 829f05a4819SAlexander Duyck 830ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 831ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 832ff181ed8SAlexander Duyck * thing without us interfering 833ff181ed8SAlexander Duyck */ 83488bae714SAlexander Duyck BUG_ON(tn != get_child(tp, cindex)); 835ff181ed8SAlexander Duyck 836f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 837f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 838f05a4819SAlexander Duyck */ 839b6f15f82SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 84088bae714SAlexander Duyck tp = inflate(t, tn); 84188bae714SAlexander Duyck if (!tp) { 842cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8438d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 844cf3637bbSAlexander Duyck #endif 845cf3637bbSAlexander Duyck break; 846cf3637bbSAlexander Duyck } 847ff181ed8SAlexander Duyck 848b6f15f82SAlexander Duyck max_work--; 84988bae714SAlexander Duyck tn = get_child(tp, cindex); 850cf3637bbSAlexander Duyck } 851cf3637bbSAlexander Duyck 852b6f15f82SAlexander Duyck /* update parent in case inflate failed */ 853b6f15f82SAlexander Duyck tp = node_parent(tn); 854b6f15f82SAlexander Duyck 855cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 856cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 857b6f15f82SAlexander Duyck return tp; 858cf3637bbSAlexander Duyck 859f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 860cf3637bbSAlexander Duyck * node is above threshold. 861cf3637bbSAlexander Duyck */ 862b6f15f82SAlexander Duyck while (should_halve(tp, tn) && max_work) { 86388bae714SAlexander Duyck tp = halve(t, tn); 86488bae714SAlexander Duyck if (!tp) { 865cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8668d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 867cf3637bbSAlexander Duyck #endif 868cf3637bbSAlexander Duyck break; 869cf3637bbSAlexander Duyck } 870cf3637bbSAlexander Duyck 871b6f15f82SAlexander Duyck max_work--; 87288bae714SAlexander Duyck tn = get_child(tp, cindex); 873ff181ed8SAlexander Duyck } 874cf3637bbSAlexander Duyck 875cf3637bbSAlexander Duyck /* Only one child remains */ 87688bae714SAlexander Duyck if (should_collapse(tn)) 87788bae714SAlexander Duyck return collapse(t, tn); 87888bae714SAlexander Duyck 879b6f15f82SAlexander Duyck /* update parent in case halve failed */ 88088bae714SAlexander Duyck tp = node_parent(tn); 8815405afd1SAlexander Duyck 8825405afd1SAlexander Duyck /* Return if at least one deflate was run */ 8835405afd1SAlexander Duyck if (max_work != MAX_WORK) 88488bae714SAlexander Duyck return tp; 8855405afd1SAlexander Duyck 8865405afd1SAlexander Duyck /* push the suffix length to the parent node */ 8875405afd1SAlexander Duyck if (tn->slen > tn->pos) { 8885405afd1SAlexander Duyck unsigned char slen = update_suffix(tn); 8895405afd1SAlexander Duyck 89088bae714SAlexander Duyck if (slen > tp->slen) 8915405afd1SAlexander Duyck tp->slen = slen; 892cf3637bbSAlexander Duyck } 8938d8e810cSAlexander Duyck 89488bae714SAlexander Duyck return tp; 895cf3637bbSAlexander Duyck } 896cf3637bbSAlexander Duyck 89735c6edacSAlexander Duyck static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) 89819baf839SRobert Olsson { 89988bae714SAlexander Duyck while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { 9005405afd1SAlexander Duyck if (update_suffix(tp) > l->slen) 9015405afd1SAlexander Duyck break; 9025405afd1SAlexander Duyck tp = node_parent(tp); 9035405afd1SAlexander Duyck } 9045405afd1SAlexander Duyck } 9055405afd1SAlexander Duyck 90635c6edacSAlexander Duyck static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) 9075405afd1SAlexander Duyck { 9085405afd1SAlexander Duyck /* if this is a new leaf then tn will be NULL and we can sort 9095405afd1SAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 9105405afd1SAlexander Duyck */ 91188bae714SAlexander Duyck while (tn->slen < l->slen) { 9125405afd1SAlexander Duyck tn->slen = l->slen; 9135405afd1SAlexander Duyck tn = node_parent(tn); 9145405afd1SAlexander Duyck } 9155405afd1SAlexander Duyck } 9165405afd1SAlexander Duyck 9172373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 91835c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t, 91935c6edacSAlexander Duyck struct key_vector **tp, u32 key) 92019baf839SRobert Olsson { 92188bae714SAlexander Duyck struct key_vector *pn, *n = t->kv; 92288bae714SAlexander Duyck unsigned long index = 0; 92319baf839SRobert Olsson 92488bae714SAlexander Duyck do { 92588bae714SAlexander Duyck pn = n; 92688bae714SAlexander Duyck n = get_child_rcu(n, index); 92788bae714SAlexander Duyck 92888bae714SAlexander Duyck if (!n) 92988bae714SAlexander Duyck break; 93088bae714SAlexander Duyck 93188bae714SAlexander Duyck index = get_cindex(key, n); 93219baf839SRobert Olsson 933939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 934939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 935939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 936939afb06SAlexander Duyck * is the difference between the key and this value. From 937939afb06SAlexander Duyck * this we can actually derive several pieces of data. 938d4a975e8SAlexander Duyck * if (index >= (1ul << bits)) 939939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 940b3832117SAlexander Duyck * else 941b3832117SAlexander Duyck * we know the value is cindex 942d4a975e8SAlexander Duyck * 943d4a975e8SAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 944d4a975e8SAlexander Duyck * fact that we can only allocate a node with 32 bits if a 945d4a975e8SAlexander Duyck * long is greater than 32 bits. 946939afb06SAlexander Duyck */ 947d4a975e8SAlexander Duyck if (index >= (1ul << n->bits)) { 948d4a975e8SAlexander Duyck n = NULL; 949d4a975e8SAlexander Duyck break; 950d4a975e8SAlexander Duyck } 951939afb06SAlexander Duyck 95288bae714SAlexander Duyck /* keep searching until we find a perfect match leaf or NULL */ 95388bae714SAlexander Duyck } while (IS_TNODE(n)); 954939afb06SAlexander Duyck 95535c6edacSAlexander Duyck *tp = pn; 956d4a975e8SAlexander Duyck 957939afb06SAlexander Duyck return n; 95819baf839SRobert Olsson } 95919baf839SRobert Olsson 96002525368SAlexander Duyck /* Return the first fib alias matching TOS with 96102525368SAlexander Duyck * priority less than or equal to PRIO. 96202525368SAlexander Duyck */ 96379e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, 9640b65bd97SAlexander Duyck u8 tos, u32 prio, u32 tb_id) 96502525368SAlexander Duyck { 96602525368SAlexander Duyck struct fib_alias *fa; 96702525368SAlexander Duyck 96802525368SAlexander Duyck if (!fah) 96902525368SAlexander Duyck return NULL; 97002525368SAlexander Duyck 97156315f9eSAlexander Duyck hlist_for_each_entry(fa, fah, fa_list) { 97279e5ad2cSAlexander Duyck if (fa->fa_slen < slen) 97379e5ad2cSAlexander Duyck continue; 97479e5ad2cSAlexander Duyck if (fa->fa_slen != slen) 97579e5ad2cSAlexander Duyck break; 9760b65bd97SAlexander Duyck if (fa->tb_id > tb_id) 9770b65bd97SAlexander Duyck continue; 9780b65bd97SAlexander Duyck if (fa->tb_id != tb_id) 9790b65bd97SAlexander Duyck break; 98002525368SAlexander Duyck if (fa->fa_tos > tos) 98102525368SAlexander Duyck continue; 98202525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 98302525368SAlexander Duyck return fa; 98402525368SAlexander Duyck } 98502525368SAlexander Duyck 98602525368SAlexander Duyck return NULL; 98702525368SAlexander Duyck } 98802525368SAlexander Duyck 98935c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn) 99019baf839SRobert Olsson { 99188bae714SAlexander Duyck while (!IS_TRIE(tn)) 99288bae714SAlexander Duyck tn = resize(t, tn); 99319baf839SRobert Olsson } 99419baf839SRobert Olsson 99535c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp, 996d5d6487cSAlexander Duyck struct fib_alias *new, t_key key) 99719baf839SRobert Olsson { 99835c6edacSAlexander Duyck struct key_vector *n, *l; 999836a0123SAlexander Duyck 1000d5d6487cSAlexander Duyck l = leaf_new(key, new); 100179e5ad2cSAlexander Duyck if (!l) 10028d8e810cSAlexander Duyck goto noleaf; 1003d5d6487cSAlexander Duyck 1004d5d6487cSAlexander Duyck /* retrieve child from parent node */ 1005754baf8dSAlexander Duyck n = get_child(tp, get_index(key, tp)); 100619baf839SRobert Olsson 1007836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1008836a0123SAlexander Duyck * 100919baf839SRobert Olsson * Add a new tnode here 101019baf839SRobert Olsson * first tnode need some special handling 1011836a0123SAlexander Duyck * leaves us in position for handling as case 3 101219baf839SRobert Olsson */ 101319baf839SRobert Olsson if (n) { 101435c6edacSAlexander Duyck struct key_vector *tn; 1015f835e471SRobert Olsson 1016e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 10178d8e810cSAlexander Duyck if (!tn) 10188d8e810cSAlexander Duyck goto notnode; 101919baf839SRobert Olsson 1020836a0123SAlexander Duyck /* initialize routes out of node */ 1021836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1022836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 102319baf839SRobert Olsson 1024836a0123SAlexander Duyck /* start adding routes into the node */ 102588bae714SAlexander Duyck put_child_root(tp, key, tn); 1026836a0123SAlexander Duyck node_set_parent(n, tn); 102719baf839SRobert Olsson 1028836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1029e962f302SAlexander Duyck tp = tn; 103019baf839SRobert Olsson } 103191b9a277SOlof Johansson 1032836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1033836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 103488bae714SAlexander Duyck put_child_root(tp, key, l); 10357b85576dSJarek Poplawski trie_rebalance(t, tp); 1036d5d6487cSAlexander Duyck 1037d5d6487cSAlexander Duyck return 0; 10388d8e810cSAlexander Duyck notnode: 10398d8e810cSAlexander Duyck node_free(l); 10408d8e810cSAlexander Duyck noleaf: 10418d8e810cSAlexander Duyck return -ENOMEM; 1042d5d6487cSAlexander Duyck } 1043d5d6487cSAlexander Duyck 104435c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp, 104535c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *new, 1046d5d6487cSAlexander Duyck struct fib_alias *fa, t_key key) 1047d5d6487cSAlexander Duyck { 1048d5d6487cSAlexander Duyck if (!l) 1049d5d6487cSAlexander Duyck return fib_insert_node(t, tp, new, key); 1050d5d6487cSAlexander Duyck 1051d5d6487cSAlexander Duyck if (fa) { 1052d5d6487cSAlexander Duyck hlist_add_before_rcu(&new->fa_list, &fa->fa_list); 1053836a0123SAlexander Duyck } else { 1054d5d6487cSAlexander Duyck struct fib_alias *last; 1055d5d6487cSAlexander Duyck 1056d5d6487cSAlexander Duyck hlist_for_each_entry(last, &l->leaf, fa_list) { 1057d5d6487cSAlexander Duyck if (new->fa_slen < last->fa_slen) 1058d5d6487cSAlexander Duyck break; 10590b65bd97SAlexander Duyck if ((new->fa_slen == last->fa_slen) && 10600b65bd97SAlexander Duyck (new->tb_id > last->tb_id)) 10610b65bd97SAlexander Duyck break; 1062d5d6487cSAlexander Duyck fa = last; 1063836a0123SAlexander Duyck } 1064836a0123SAlexander Duyck 1065d5d6487cSAlexander Duyck if (fa) 1066d5d6487cSAlexander Duyck hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); 1067d5d6487cSAlexander Duyck else 1068d5d6487cSAlexander Duyck hlist_add_head_rcu(&new->fa_list, &l->leaf); 106919baf839SRobert Olsson } 107019baf839SRobert Olsson 1071d5d6487cSAlexander Duyck /* if we added to the tail node then we need to update slen */ 1072d5d6487cSAlexander Duyck if (l->slen < new->fa_slen) { 1073d5d6487cSAlexander Duyck l->slen = new->fa_slen; 1074d5d6487cSAlexander Duyck leaf_push_suffix(tp, l); 1075d5d6487cSAlexander Duyck } 1076d5d6487cSAlexander Duyck 1077d5d6487cSAlexander Duyck return 0; 1078d5d6487cSAlexander Duyck } 1079d5d6487cSAlexander Duyck 1080d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 108116c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 108219baf839SRobert Olsson { 108319baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 108419baf839SRobert Olsson struct fib_alias *fa, *new_fa; 108535c6edacSAlexander Duyck struct key_vector *l, *tp; 1086a2bb6d7dSRoopa Prabhu unsigned int nlflags = 0; 108719baf839SRobert Olsson struct fib_info *fi; 108879e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 108979e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 10904e902c57SThomas Graf u8 tos = cfg->fc_tos; 1091d4a975e8SAlexander Duyck u32 key; 109219baf839SRobert Olsson int err; 109319baf839SRobert Olsson 10945786ec60SAlexander Duyck if (plen > KEYLENGTH) 109519baf839SRobert Olsson return -EINVAL; 109619baf839SRobert Olsson 10974e902c57SThomas Graf key = ntohl(cfg->fc_dst); 109819baf839SRobert Olsson 10992dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 110019baf839SRobert Olsson 1101d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 110219baf839SRobert Olsson return -EINVAL; 110319baf839SRobert Olsson 11044e902c57SThomas Graf fi = fib_create_info(cfg); 11054e902c57SThomas Graf if (IS_ERR(fi)) { 11064e902c57SThomas Graf err = PTR_ERR(fi); 110719baf839SRobert Olsson goto err; 11084e902c57SThomas Graf } 110919baf839SRobert Olsson 1110d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 11110b65bd97SAlexander Duyck fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, 11120b65bd97SAlexander Duyck tb->tb_id) : NULL; 111319baf839SRobert Olsson 111419baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 111519baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 111619baf839SRobert Olsson * exists or to the node before which we will insert new one. 111719baf839SRobert Olsson * 111819baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 111956315f9eSAlexander Duyck * insert to the tail of the section matching the suffix length 112056315f9eSAlexander Duyck * of the new alias. 112119baf839SRobert Olsson */ 112219baf839SRobert Olsson 1123936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1124936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1125936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 112619baf839SRobert Olsson 112719baf839SRobert Olsson err = -EEXIST; 11284e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 112919baf839SRobert Olsson goto out; 113019baf839SRobert Olsson 1131936f6f8eSJulian Anastasov /* We have 2 goals: 1132936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1133936f6f8eSJulian Anastasov * duplicate routes 1134936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1135936f6f8eSJulian Anastasov */ 1136936f6f8eSJulian Anastasov fa_match = NULL; 1137936f6f8eSJulian Anastasov fa_first = fa; 113856315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 11390b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 11400b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 11410b65bd97SAlexander Duyck (fa->fa_tos != tos)) 1142936f6f8eSJulian Anastasov break; 1143936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1144936f6f8eSJulian Anastasov break; 1145936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1146936f6f8eSJulian Anastasov fa->fa_info == fi) { 1147936f6f8eSJulian Anastasov fa_match = fa; 1148936f6f8eSJulian Anastasov break; 1149936f6f8eSJulian Anastasov } 1150936f6f8eSJulian Anastasov } 1151936f6f8eSJulian Anastasov 11524e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 115319baf839SRobert Olsson struct fib_info *fi_drop; 115419baf839SRobert Olsson u8 state; 115519baf839SRobert Olsson 1156936f6f8eSJulian Anastasov fa = fa_first; 1157936f6f8eSJulian Anastasov if (fa_match) { 1158936f6f8eSJulian Anastasov if (fa == fa_match) 1159936f6f8eSJulian Anastasov err = 0; 11606725033fSJoonwoo Park goto out; 1161936f6f8eSJulian Anastasov } 11622373ce1cSRobert Olsson err = -ENOBUFS; 1163e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 116451456b29SIan Morris if (!new_fa) 11652373ce1cSRobert Olsson goto out; 116619baf839SRobert Olsson 116719baf839SRobert Olsson fi_drop = fa->fa_info; 11682373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 11692373ce1cSRobert Olsson new_fa->fa_info = fi; 11704e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 117119baf839SRobert Olsson state = fa->fa_state; 1172936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 11739b6ebad5SAlexander Duyck new_fa->fa_slen = fa->fa_slen; 1174d4e64c29SMichal Kubeček new_fa->tb_id = tb->tb_id; 11752392debcSJulian Anastasov new_fa->fa_default = -1; 117619baf839SRobert Olsson 1177ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, 11788e05fd71SScott Feldman new_fa->fa_tos, 11798e05fd71SScott Feldman cfg->fc_type, 1180f8f21471SScott Feldman cfg->fc_nlflags, 11818e05fd71SScott Feldman tb->tb_id); 11828e05fd71SScott Feldman if (err) { 1183ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 11848e05fd71SScott Feldman kmem_cache_free(fn_alias_kmem, new_fa); 11858e05fd71SScott Feldman goto out; 11868e05fd71SScott Feldman } 11878e05fd71SScott Feldman 118856315f9eSAlexander Duyck hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 11898e05fd71SScott Feldman 11902373ce1cSRobert Olsson alias_free_mem_rcu(fa); 119119baf839SRobert Olsson 119219baf839SRobert Olsson fib_release_info(fi_drop); 119319baf839SRobert Olsson if (state & FA_S_ACCESSED) 11944ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1195b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1196b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 119719baf839SRobert Olsson 119819baf839SRobert Olsson goto succeeded; 119919baf839SRobert Olsson } 120019baf839SRobert Olsson /* Error if we find a perfect match which 120119baf839SRobert Olsson * uses the same scope, type, and nexthop 120219baf839SRobert Olsson * information. 120319baf839SRobert Olsson */ 1204936f6f8eSJulian Anastasov if (fa_match) 120519baf839SRobert Olsson goto out; 1206a07f5f50SStephen Hemminger 1207a2bb6d7dSRoopa Prabhu if (cfg->fc_nlflags & NLM_F_APPEND) 1208a2bb6d7dSRoopa Prabhu nlflags = NLM_F_APPEND; 1209a2bb6d7dSRoopa Prabhu else 1210936f6f8eSJulian Anastasov fa = fa_first; 121119baf839SRobert Olsson } 121219baf839SRobert Olsson err = -ENOENT; 12134e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 121419baf839SRobert Olsson goto out; 121519baf839SRobert Olsson 121619baf839SRobert Olsson err = -ENOBUFS; 1217e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 121851456b29SIan Morris if (!new_fa) 121919baf839SRobert Olsson goto out; 122019baf839SRobert Olsson 122119baf839SRobert Olsson new_fa->fa_info = fi; 122219baf839SRobert Olsson new_fa->fa_tos = tos; 12234e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 122419baf839SRobert Olsson new_fa->fa_state = 0; 122579e5ad2cSAlexander Duyck new_fa->fa_slen = slen; 12260ddcf43dSAlexander Duyck new_fa->tb_id = tb->tb_id; 12272392debcSJulian Anastasov new_fa->fa_default = -1; 122819baf839SRobert Olsson 12298e05fd71SScott Feldman /* (Optionally) offload fib entry to switch hardware. */ 1230ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, 1231ebb9a03aSJiri Pirko cfg->fc_nlflags, tb->tb_id); 12328e05fd71SScott Feldman if (err) { 1233ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 12348e05fd71SScott Feldman goto out_free_new_fa; 12358e05fd71SScott Feldman } 12368e05fd71SScott Feldman 12379b6ebad5SAlexander Duyck /* Insert new entry to the list. */ 1238d5d6487cSAlexander Duyck err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1239d5d6487cSAlexander Duyck if (err) 12408e05fd71SScott Feldman goto out_sw_fib_del; 124119baf839SRobert Olsson 124221d8c49eSDavid S. Miller if (!plen) 124321d8c49eSDavid S. Miller tb->tb_num_default++; 124421d8c49eSDavid S. Miller 12454ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 12460ddcf43dSAlexander Duyck rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1247a2bb6d7dSRoopa Prabhu &cfg->fc_nlinfo, nlflags); 124819baf839SRobert Olsson succeeded: 124919baf839SRobert Olsson return 0; 1250f835e471SRobert Olsson 12518e05fd71SScott Feldman out_sw_fib_del: 1252ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); 1253f835e471SRobert Olsson out_free_new_fa: 1254f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 125519baf839SRobert Olsson out: 125619baf839SRobert Olsson fib_release_info(fi); 125791b9a277SOlof Johansson err: 125819baf839SRobert Olsson return err; 125919baf839SRobert Olsson } 126019baf839SRobert Olsson 126135c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n) 12629f9e636dSAlexander Duyck { 12639f9e636dSAlexander Duyck t_key prefix = n->key; 12649f9e636dSAlexander Duyck 12659f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 12669f9e636dSAlexander Duyck } 12679f9e636dSAlexander Duyck 1268345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 126922bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1270ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 127119baf839SRobert Olsson { 127219baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 12738274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 12748274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 12758274a97aSAlexander Duyck #endif 12769f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 127735c6edacSAlexander Duyck struct key_vector *n, *pn; 127879e5ad2cSAlexander Duyck struct fib_alias *fa; 127971e8b67dSAlexander Duyck unsigned long index; 12809f9e636dSAlexander Duyck t_key cindex; 128119baf839SRobert Olsson 1282f6d3c192SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp); 1283f6d3c192SDavid Ahern 128488bae714SAlexander Duyck pn = t->kv; 128588bae714SAlexander Duyck cindex = 0; 128688bae714SAlexander Duyck 128788bae714SAlexander Duyck n = get_child_rcu(pn, cindex); 128819baf839SRobert Olsson if (!n) 1289345e9b54SAlexander Duyck return -EAGAIN; 129019baf839SRobert Olsson 129119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 12928274a97aSAlexander Duyck this_cpu_inc(stats->gets); 129319baf839SRobert Olsson #endif 129419baf839SRobert Olsson 12959f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 12969f9e636dSAlexander Duyck for (;;) { 129788bae714SAlexander Duyck index = get_cindex(key, n); 12989f9e636dSAlexander Duyck 12999f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13009f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13019f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13029f9e636dSAlexander Duyck * is the difference between the key and this value. From 13039f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 130471e8b67dSAlexander Duyck * if (index >= (1ul << bits)) 13059f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1306b3832117SAlexander Duyck * else 1307b3832117SAlexander Duyck * we know the value is cindex 130871e8b67dSAlexander Duyck * 130971e8b67dSAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 131071e8b67dSAlexander Duyck * fact that we can only allocate a node with 32 bits if a 131171e8b67dSAlexander Duyck * long is greater than 32 bits. 13129f9e636dSAlexander Duyck */ 131371e8b67dSAlexander Duyck if (index >= (1ul << n->bits)) 13149f9e636dSAlexander Duyck break; 13159f9e636dSAlexander Duyck 13169f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13179f9e636dSAlexander Duyck if (IS_LEAF(n)) 1318a07f5f50SStephen Hemminger goto found; 13199f9e636dSAlexander Duyck 13209f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13219f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13229f9e636dSAlexander Duyck */ 13235405afd1SAlexander Duyck if (n->slen > n->pos) { 13249f9e636dSAlexander Duyck pn = n; 13259f9e636dSAlexander Duyck cindex = index; 132619baf839SRobert Olsson } 1327a07f5f50SStephen Hemminger 1328754baf8dSAlexander Duyck n = get_child_rcu(n, index); 13299f9e636dSAlexander Duyck if (unlikely(!n)) 13309f9e636dSAlexander Duyck goto backtrace; 13319f9e636dSAlexander Duyck } 133219baf839SRobert Olsson 13339f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13349f9e636dSAlexander Duyck for (;;) { 13359f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 133635c6edacSAlexander Duyck struct key_vector __rcu **cptr = n->tnode; 133719baf839SRobert Olsson 13389f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13399f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13409f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13419f9e636dSAlexander Duyck */ 13425405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13439f9e636dSAlexander Duyck goto backtrace; 134419baf839SRobert Olsson 13459f9e636dSAlexander Duyck /* exit out and process leaf */ 13469f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13479f9e636dSAlexander Duyck break; 134819baf839SRobert Olsson 13499f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13509f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 13519f9e636dSAlexander Duyck * we started this traversal anyway 13529f9e636dSAlexander Duyck */ 13539f9e636dSAlexander Duyck 13549f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 13559f9e636dSAlexander Duyck backtrace: 135619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13579f9e636dSAlexander Duyck if (!n) 13588274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 135919baf839SRobert Olsson #endif 13609f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 13619f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 13629f9e636dSAlexander Duyck * up one level to see if there are any more bits to 13639f9e636dSAlexander Duyck * be stripped there. 136419baf839SRobert Olsson */ 13659f9e636dSAlexander Duyck while (!cindex) { 13669f9e636dSAlexander Duyck t_key pkey = pn->key; 136719baf839SRobert Olsson 136888bae714SAlexander Duyck /* If we don't have a parent then there is 136988bae714SAlexander Duyck * nothing for us to do as we do not have any 137088bae714SAlexander Duyck * further nodes to parse. 137188bae714SAlexander Duyck */ 137288bae714SAlexander Duyck if (IS_TRIE(pn)) 1373345e9b54SAlexander Duyck return -EAGAIN; 137419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13758274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 137619baf839SRobert Olsson #endif 13779f9e636dSAlexander Duyck /* Get Child's index */ 137888bae714SAlexander Duyck pn = node_parent_rcu(pn); 13799f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 13809f9e636dSAlexander Duyck } 13819f9e636dSAlexander Duyck 13829f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 13839f9e636dSAlexander Duyck cindex &= cindex - 1; 13849f9e636dSAlexander Duyck 13859f9e636dSAlexander Duyck /* grab pointer for next child node */ 138641b489fdSAlexander Duyck cptr = &pn->tnode[cindex]; 138719baf839SRobert Olsson } 138819baf839SRobert Olsson } 13899f9e636dSAlexander Duyck 139019baf839SRobert Olsson found: 139171e8b67dSAlexander Duyck /* this line carries forward the xor from earlier in the function */ 139271e8b67dSAlexander Duyck index = key ^ n->key; 139371e8b67dSAlexander Duyck 13949f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 139579e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 1396345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1397345e9b54SAlexander Duyck int nhsel, err; 1398345e9b54SAlexander Duyck 139971e8b67dSAlexander Duyck if ((index >= (1ul << fa->fa_slen)) && 140079e5ad2cSAlexander Duyck ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH))) 14019b6ebad5SAlexander Duyck continue; 1402345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1403345e9b54SAlexander Duyck continue; 1404345e9b54SAlexander Duyck if (fi->fib_dead) 1405345e9b54SAlexander Duyck continue; 1406345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1407345e9b54SAlexander Duyck continue; 1408345e9b54SAlexander Duyck fib_alias_accessed(fa); 1409345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1410345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1411345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1412345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1413345e9b54SAlexander Duyck #endif 1414345e9b54SAlexander Duyck return err; 1415345e9b54SAlexander Duyck } 1416345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1417345e9b54SAlexander Duyck continue; 1418345e9b54SAlexander Duyck for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1419345e9b54SAlexander Duyck const struct fib_nh *nh = &fi->fib_nh[nhsel]; 14200eeb075fSAndy Gospodarek struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); 1421345e9b54SAlexander Duyck 1422345e9b54SAlexander Duyck if (nh->nh_flags & RTNH_F_DEAD) 1423345e9b54SAlexander Duyck continue; 14240eeb075fSAndy Gospodarek if (in_dev && 14250eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 14260eeb075fSAndy Gospodarek nh->nh_flags & RTNH_F_LINKDOWN && 14270eeb075fSAndy Gospodarek !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 14280eeb075fSAndy Gospodarek continue; 1429613d09b3SDavid Ahern if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) { 1430613d09b3SDavid Ahern if (flp->flowi4_oif && 1431613d09b3SDavid Ahern flp->flowi4_oif != nh->nh_oif) 1432345e9b54SAlexander Duyck continue; 1433613d09b3SDavid Ahern } 1434345e9b54SAlexander Duyck 1435345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 1436345e9b54SAlexander Duyck atomic_inc(&fi->fib_clntref); 1437345e9b54SAlexander Duyck 14389b6ebad5SAlexander Duyck res->prefixlen = KEYLENGTH - fa->fa_slen; 1439345e9b54SAlexander Duyck res->nh_sel = nhsel; 1440345e9b54SAlexander Duyck res->type = fa->fa_type; 1441345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1442345e9b54SAlexander Duyck res->fi = fi; 1443345e9b54SAlexander Duyck res->table = tb; 144479e5ad2cSAlexander Duyck res->fa_head = &n->leaf; 1445345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1446345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1447345e9b54SAlexander Duyck #endif 1448f6d3c192SDavid Ahern trace_fib_table_lookup_nh(nh); 1449f6d3c192SDavid Ahern 1450345e9b54SAlexander Duyck return err; 1451345e9b54SAlexander Duyck } 1452345e9b54SAlexander Duyck } 1453345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1454345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1455345e9b54SAlexander Duyck #endif 14569f9e636dSAlexander Duyck goto backtrace; 145719baf839SRobert Olsson } 14586fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 145919baf839SRobert Olsson 146035c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp, 146135c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *old) 1462d5d6487cSAlexander Duyck { 1463d5d6487cSAlexander Duyck /* record the location of the previous list_info entry */ 1464d5d6487cSAlexander Duyck struct hlist_node **pprev = old->fa_list.pprev; 1465d5d6487cSAlexander Duyck struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); 1466d5d6487cSAlexander Duyck 1467d5d6487cSAlexander Duyck /* remove the fib_alias from the list */ 1468d5d6487cSAlexander Duyck hlist_del_rcu(&old->fa_list); 1469d5d6487cSAlexander Duyck 1470d5d6487cSAlexander Duyck /* if we emptied the list this leaf will be freed and we can sort 1471d5d6487cSAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 1472d562f1f8SRobert Olsson */ 1473d5d6487cSAlexander Duyck if (hlist_empty(&l->leaf)) { 147488bae714SAlexander Duyck put_child_root(tp, l->key, NULL); 1475d5d6487cSAlexander Duyck node_free(l); 1476d5d6487cSAlexander Duyck trie_rebalance(t, tp); 1477d5d6487cSAlexander Duyck return; 1478d5d6487cSAlexander Duyck } 1479d5d6487cSAlexander Duyck 1480d5d6487cSAlexander Duyck /* only access fa if it is pointing at the last valid hlist_node */ 1481d5d6487cSAlexander Duyck if (*pprev) 1482d5d6487cSAlexander Duyck return; 1483d5d6487cSAlexander Duyck 1484d5d6487cSAlexander Duyck /* update the trie with the latest suffix length */ 1485d5d6487cSAlexander Duyck l->slen = fa->fa_slen; 1486d5d6487cSAlexander Duyck leaf_pull_suffix(tp, l); 1487d5d6487cSAlexander Duyck } 1488d5d6487cSAlexander Duyck 1489d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 149016c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) 149119baf839SRobert Olsson { 149219baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 149319baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 149435c6edacSAlexander Duyck struct key_vector *l, *tp; 149579e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 149679e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 1497d4a975e8SAlexander Duyck u8 tos = cfg->fc_tos; 1498d4a975e8SAlexander Duyck u32 key; 149991b9a277SOlof Johansson 150079e5ad2cSAlexander Duyck if (plen > KEYLENGTH) 150119baf839SRobert Olsson return -EINVAL; 150219baf839SRobert Olsson 15034e902c57SThomas Graf key = ntohl(cfg->fc_dst); 150419baf839SRobert Olsson 1505d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 150619baf839SRobert Olsson return -EINVAL; 150719baf839SRobert Olsson 1508d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 150919baf839SRobert Olsson if (!l) 151019baf839SRobert Olsson return -ESRCH; 151119baf839SRobert Olsson 15120b65bd97SAlexander Duyck fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); 151319baf839SRobert Olsson if (!fa) 151419baf839SRobert Olsson return -ESRCH; 151519baf839SRobert Olsson 15160c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 151719baf839SRobert Olsson 151819baf839SRobert Olsson fa_to_delete = NULL; 151956315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 152019baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 152119baf839SRobert Olsson 15220b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 15230b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 15240b65bd97SAlexander Duyck (fa->fa_tos != tos)) 152519baf839SRobert Olsson break; 152619baf839SRobert Olsson 15274e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15284e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 152937e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 153074cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 153174cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15324e902c57SThomas Graf (!cfg->fc_protocol || 15334e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15344e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 153519baf839SRobert Olsson fa_to_delete = fa; 153619baf839SRobert Olsson break; 153719baf839SRobert Olsson } 153819baf839SRobert Olsson } 153919baf839SRobert Olsson 154091b9a277SOlof Johansson if (!fa_to_delete) 154191b9a277SOlof Johansson return -ESRCH; 154219baf839SRobert Olsson 1543ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, 15448e05fd71SScott Feldman cfg->fc_type, tb->tb_id); 15458e05fd71SScott Feldman 1546d5d6487cSAlexander Duyck rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1547b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 154819baf839SRobert Olsson 154921d8c49eSDavid S. Miller if (!plen) 155021d8c49eSDavid S. Miller tb->tb_num_default--; 155121d8c49eSDavid S. Miller 1552d5d6487cSAlexander Duyck fib_remove_alias(t, tp, l, fa_to_delete); 15537289e6ddSAlexander Duyck 1554d5d6487cSAlexander Duyck if (fa_to_delete->fa_state & FA_S_ACCESSED) 15554ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 155619baf839SRobert Olsson 1557d5d6487cSAlexander Duyck fib_release_info(fa_to_delete->fa_info); 1558d5d6487cSAlexander Duyck alias_free_mem_rcu(fa_to_delete); 155919baf839SRobert Olsson return 0; 156019baf839SRobert Olsson } 156119baf839SRobert Olsson 15628be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */ 156335c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) 156419baf839SRobert Olsson { 156535c6edacSAlexander Duyck struct key_vector *pn, *n = *tn; 15668be33e95SAlexander Duyck unsigned long cindex; 156719baf839SRobert Olsson 15688be33e95SAlexander Duyck /* this loop is meant to try and find the key in the trie */ 156988bae714SAlexander Duyck do { 157088bae714SAlexander Duyck /* record parent and next child index */ 157188bae714SAlexander Duyck pn = n; 15723ec320ddSAlexander Duyck cindex = key ? get_index(key, pn) : 0; 157388bae714SAlexander Duyck 157488bae714SAlexander Duyck if (cindex >> pn->bits) 157588bae714SAlexander Duyck break; 157688bae714SAlexander Duyck 157788bae714SAlexander Duyck /* descend into the next child */ 157888bae714SAlexander Duyck n = get_child_rcu(pn, cindex++); 157988bae714SAlexander Duyck if (!n) 158088bae714SAlexander Duyck break; 15818be33e95SAlexander Duyck 15828be33e95SAlexander Duyck /* guarantee forward progress on the keys */ 15838be33e95SAlexander Duyck if (IS_LEAF(n) && (n->key >= key)) 15848be33e95SAlexander Duyck goto found; 158588bae714SAlexander Duyck } while (IS_TNODE(n)); 15868be33e95SAlexander Duyck 15878be33e95SAlexander Duyck /* this loop will search for the next leaf with a greater key */ 158888bae714SAlexander Duyck while (!IS_TRIE(pn)) { 15898be33e95SAlexander Duyck /* if we exhausted the parent node we will need to climb */ 15908be33e95SAlexander Duyck if (cindex >= (1ul << pn->bits)) { 15918be33e95SAlexander Duyck t_key pkey = pn->key; 15928be33e95SAlexander Duyck 15938be33e95SAlexander Duyck pn = node_parent_rcu(pn); 15948be33e95SAlexander Duyck cindex = get_index(pkey, pn) + 1; 15958be33e95SAlexander Duyck continue; 15968be33e95SAlexander Duyck } 15978be33e95SAlexander Duyck 15988be33e95SAlexander Duyck /* grab the next available node */ 1599754baf8dSAlexander Duyck n = get_child_rcu(pn, cindex++); 16008be33e95SAlexander Duyck if (!n) 160191b9a277SOlof Johansson continue; 160219baf839SRobert Olsson 16038be33e95SAlexander Duyck /* no need to compare keys since we bumped the index */ 16048be33e95SAlexander Duyck if (IS_LEAF(n)) 16058be33e95SAlexander Duyck goto found; 160682cfbb00SStephen Hemminger 160782cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 16088be33e95SAlexander Duyck pn = n; 16098be33e95SAlexander Duyck cindex = 0; 161019baf839SRobert Olsson } 161182cfbb00SStephen Hemminger 16128be33e95SAlexander Duyck *tn = pn; 161382cfbb00SStephen Hemminger return NULL; /* Root of trie */ 16148be33e95SAlexander Duyck found: 16158be33e95SAlexander Duyck /* if we are at the limit for keys just return NULL for the tnode */ 161688bae714SAlexander Duyck *tn = pn; 1617adaf9816SAlexander Duyck return n; 161882cfbb00SStephen Hemminger } 161982cfbb00SStephen Hemminger 16200ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb) 16210ddcf43dSAlexander Duyck { 16220ddcf43dSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 16230ddcf43dSAlexander Duyck struct key_vector *pn = t->kv; 16240ddcf43dSAlexander Duyck unsigned long cindex = 1; 16250ddcf43dSAlexander Duyck struct hlist_node *tmp; 16260ddcf43dSAlexander Duyck struct fib_alias *fa; 16270ddcf43dSAlexander Duyck 16280ddcf43dSAlexander Duyck /* walk trie in reverse order and free everything */ 16290ddcf43dSAlexander Duyck for (;;) { 16300ddcf43dSAlexander Duyck struct key_vector *n; 16310ddcf43dSAlexander Duyck 16320ddcf43dSAlexander Duyck if (!(cindex--)) { 16330ddcf43dSAlexander Duyck t_key pkey = pn->key; 16340ddcf43dSAlexander Duyck 16350ddcf43dSAlexander Duyck if (IS_TRIE(pn)) 16360ddcf43dSAlexander Duyck break; 16370ddcf43dSAlexander Duyck 16380ddcf43dSAlexander Duyck n = pn; 16390ddcf43dSAlexander Duyck pn = node_parent(pn); 16400ddcf43dSAlexander Duyck 16410ddcf43dSAlexander Duyck /* drop emptied tnode */ 16420ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16430ddcf43dSAlexander Duyck node_free(n); 16440ddcf43dSAlexander Duyck 16450ddcf43dSAlexander Duyck cindex = get_index(pkey, pn); 16460ddcf43dSAlexander Duyck 16470ddcf43dSAlexander Duyck continue; 16480ddcf43dSAlexander Duyck } 16490ddcf43dSAlexander Duyck 16500ddcf43dSAlexander Duyck /* grab the next available node */ 16510ddcf43dSAlexander Duyck n = get_child(pn, cindex); 16520ddcf43dSAlexander Duyck if (!n) 16530ddcf43dSAlexander Duyck continue; 16540ddcf43dSAlexander Duyck 16550ddcf43dSAlexander Duyck if (IS_TNODE(n)) { 16560ddcf43dSAlexander Duyck /* record pn and cindex for leaf walking */ 16570ddcf43dSAlexander Duyck pn = n; 16580ddcf43dSAlexander Duyck cindex = 1ul << n->bits; 16590ddcf43dSAlexander Duyck 16600ddcf43dSAlexander Duyck continue; 16610ddcf43dSAlexander Duyck } 16620ddcf43dSAlexander Duyck 16630ddcf43dSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 16640ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 16650ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 16660ddcf43dSAlexander Duyck } 16670ddcf43dSAlexander Duyck 16680ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16690ddcf43dSAlexander Duyck node_free(n); 16700ddcf43dSAlexander Duyck } 16710ddcf43dSAlexander Duyck 16720ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 16730ddcf43dSAlexander Duyck free_percpu(t->stats); 16740ddcf43dSAlexander Duyck #endif 16750ddcf43dSAlexander Duyck kfree(tb); 16760ddcf43dSAlexander Duyck } 16770ddcf43dSAlexander Duyck 16780ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) 16790ddcf43dSAlexander Duyck { 16800ddcf43dSAlexander Duyck struct trie *ot = (struct trie *)oldtb->tb_data; 16810ddcf43dSAlexander Duyck struct key_vector *l, *tp = ot->kv; 16820ddcf43dSAlexander Duyck struct fib_table *local_tb; 16830ddcf43dSAlexander Duyck struct fib_alias *fa; 16840ddcf43dSAlexander Duyck struct trie *lt; 16850ddcf43dSAlexander Duyck t_key key = 0; 16860ddcf43dSAlexander Duyck 16870ddcf43dSAlexander Duyck if (oldtb->tb_data == oldtb->__data) 16880ddcf43dSAlexander Duyck return oldtb; 16890ddcf43dSAlexander Duyck 16900ddcf43dSAlexander Duyck local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); 16910ddcf43dSAlexander Duyck if (!local_tb) 16920ddcf43dSAlexander Duyck return NULL; 16930ddcf43dSAlexander Duyck 16940ddcf43dSAlexander Duyck lt = (struct trie *)local_tb->tb_data; 16950ddcf43dSAlexander Duyck 16960ddcf43dSAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 16970ddcf43dSAlexander Duyck struct key_vector *local_l = NULL, *local_tp; 16980ddcf43dSAlexander Duyck 16990ddcf43dSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 17000ddcf43dSAlexander Duyck struct fib_alias *new_fa; 17010ddcf43dSAlexander Duyck 17020ddcf43dSAlexander Duyck if (local_tb->tb_id != fa->tb_id) 17030ddcf43dSAlexander Duyck continue; 17040ddcf43dSAlexander Duyck 17050ddcf43dSAlexander Duyck /* clone fa for new local table */ 17060ddcf43dSAlexander Duyck new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 17070ddcf43dSAlexander Duyck if (!new_fa) 17080ddcf43dSAlexander Duyck goto out; 17090ddcf43dSAlexander Duyck 17100ddcf43dSAlexander Duyck memcpy(new_fa, fa, sizeof(*fa)); 17110ddcf43dSAlexander Duyck 17120ddcf43dSAlexander Duyck /* insert clone into table */ 17130ddcf43dSAlexander Duyck if (!local_l) 17140ddcf43dSAlexander Duyck local_l = fib_find_node(lt, &local_tp, l->key); 17150ddcf43dSAlexander Duyck 17160ddcf43dSAlexander Duyck if (fib_insert_alias(lt, local_tp, local_l, new_fa, 17170ddcf43dSAlexander Duyck NULL, l->key)) 17180ddcf43dSAlexander Duyck goto out; 17190ddcf43dSAlexander Duyck } 17200ddcf43dSAlexander Duyck 17210ddcf43dSAlexander Duyck /* stop loop if key wrapped back to 0 */ 17220ddcf43dSAlexander Duyck key = l->key + 1; 17230ddcf43dSAlexander Duyck if (key < l->key) 17240ddcf43dSAlexander Duyck break; 17250ddcf43dSAlexander Duyck } 17260ddcf43dSAlexander Duyck 17270ddcf43dSAlexander Duyck return local_tb; 17280ddcf43dSAlexander Duyck out: 17290ddcf43dSAlexander Duyck fib_trie_free(local_tb); 17300ddcf43dSAlexander Duyck 17310ddcf43dSAlexander Duyck return NULL; 17320ddcf43dSAlexander Duyck } 17330ddcf43dSAlexander Duyck 1734104616e7SScott Feldman /* Caller must hold RTNL */ 1735104616e7SScott Feldman void fib_table_flush_external(struct fib_table *tb) 1736104616e7SScott Feldman { 1737104616e7SScott Feldman struct trie *t = (struct trie *)tb->tb_data; 173888bae714SAlexander Duyck struct key_vector *pn = t->kv; 173988bae714SAlexander Duyck unsigned long cindex = 1; 174088bae714SAlexander Duyck struct hlist_node *tmp; 1741104616e7SScott Feldman struct fib_alias *fa; 1742104616e7SScott Feldman 1743104616e7SScott Feldman /* walk trie in reverse order */ 174488bae714SAlexander Duyck for (;;) { 17450ddcf43dSAlexander Duyck unsigned char slen = 0; 174688bae714SAlexander Duyck struct key_vector *n; 174788bae714SAlexander Duyck 174888bae714SAlexander Duyck if (!(cindex--)) { 1749104616e7SScott Feldman t_key pkey = pn->key; 1750104616e7SScott Feldman 175188bae714SAlexander Duyck /* cannot resize the trie vector */ 175288bae714SAlexander Duyck if (IS_TRIE(pn)) 175388bae714SAlexander Duyck break; 1754104616e7SScott Feldman 17550ddcf43dSAlexander Duyck /* resize completed node */ 17560ddcf43dSAlexander Duyck pn = resize(t, pn); 1757104616e7SScott Feldman cindex = get_index(pkey, pn); 175888bae714SAlexander Duyck 175988bae714SAlexander Duyck continue; 1760104616e7SScott Feldman } 1761104616e7SScott Feldman 1762104616e7SScott Feldman /* grab the next available node */ 1763754baf8dSAlexander Duyck n = get_child(pn, cindex); 176488bae714SAlexander Duyck if (!n) 176588bae714SAlexander Duyck continue; 176688bae714SAlexander Duyck 176788bae714SAlexander Duyck if (IS_TNODE(n)) { 176888bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 176988bae714SAlexander Duyck pn = n; 177088bae714SAlexander Duyck cindex = 1ul << n->bits; 177188bae714SAlexander Duyck 177288bae714SAlexander Duyck continue; 1773104616e7SScott Feldman } 1774104616e7SScott Feldman 177588bae714SAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 1776104616e7SScott Feldman struct fib_info *fi = fa->fa_info; 1777104616e7SScott Feldman 17780ddcf43dSAlexander Duyck /* if alias was cloned to local then we just 17790ddcf43dSAlexander Duyck * need to remove the local copy from main 17800ddcf43dSAlexander Duyck */ 17810ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 17820ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 17830ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 17840ddcf43dSAlexander Duyck continue; 17850ddcf43dSAlexander Duyck } 17860ddcf43dSAlexander Duyck 17870ddcf43dSAlexander Duyck /* record local slen */ 17880ddcf43dSAlexander Duyck slen = fa->fa_slen; 17890ddcf43dSAlexander Duyck 1790eea39946SRoopa Prabhu if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) 179172be7260SAlexander Duyck continue; 179272be7260SAlexander Duyck 1793ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1794ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1795ebb9a03aSJiri Pirko tb->tb_id); 1796104616e7SScott Feldman } 17970ddcf43dSAlexander Duyck 17980ddcf43dSAlexander Duyck /* update leaf slen */ 17990ddcf43dSAlexander Duyck n->slen = slen; 18000ddcf43dSAlexander Duyck 18010ddcf43dSAlexander Duyck if (hlist_empty(&n->leaf)) { 18020ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 18030ddcf43dSAlexander Duyck node_free(n); 18040ddcf43dSAlexander Duyck } 180588bae714SAlexander Duyck } 1806104616e7SScott Feldman } 1807104616e7SScott Feldman 18088be33e95SAlexander Duyck /* Caller must hold RTNL. */ 180916c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb) 181019baf839SRobert Olsson { 181119baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 181288bae714SAlexander Duyck struct key_vector *pn = t->kv; 181388bae714SAlexander Duyck unsigned long cindex = 1; 18147289e6ddSAlexander Duyck struct hlist_node *tmp; 18157289e6ddSAlexander Duyck struct fib_alias *fa; 181682cfbb00SStephen Hemminger int found = 0; 181719baf839SRobert Olsson 18187289e6ddSAlexander Duyck /* walk trie in reverse order */ 181988bae714SAlexander Duyck for (;;) { 182088bae714SAlexander Duyck unsigned char slen = 0; 182188bae714SAlexander Duyck struct key_vector *n; 182288bae714SAlexander Duyck 182388bae714SAlexander Duyck if (!(cindex--)) { 18247289e6ddSAlexander Duyck t_key pkey = pn->key; 18257289e6ddSAlexander Duyck 182688bae714SAlexander Duyck /* cannot resize the trie vector */ 182788bae714SAlexander Duyck if (IS_TRIE(pn)) 182888bae714SAlexander Duyck break; 18297289e6ddSAlexander Duyck 18307289e6ddSAlexander Duyck /* resize completed node */ 183188bae714SAlexander Duyck pn = resize(t, pn); 18327289e6ddSAlexander Duyck cindex = get_index(pkey, pn); 183388bae714SAlexander Duyck 183488bae714SAlexander Duyck continue; 183564c62723SAlexander Duyck } 183664c62723SAlexander Duyck 18377289e6ddSAlexander Duyck /* grab the next available node */ 1838754baf8dSAlexander Duyck n = get_child(pn, cindex); 183988bae714SAlexander Duyck if (!n) 184088bae714SAlexander Duyck continue; 184119baf839SRobert Olsson 184288bae714SAlexander Duyck if (IS_TNODE(n)) { 184388bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 184488bae714SAlexander Duyck pn = n; 184588bae714SAlexander Duyck cindex = 1ul << n->bits; 184688bae714SAlexander Duyck 184788bae714SAlexander Duyck continue; 184888bae714SAlexander Duyck } 18497289e6ddSAlexander Duyck 18507289e6ddSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 18517289e6ddSAlexander Duyck struct fib_info *fi = fa->fa_info; 18527289e6ddSAlexander Duyck 185388bae714SAlexander Duyck if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { 185488bae714SAlexander Duyck slen = fa->fa_slen; 185588bae714SAlexander Duyck continue; 185688bae714SAlexander Duyck } 185788bae714SAlexander Duyck 1858ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1859ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1860ebb9a03aSJiri Pirko tb->tb_id); 18617289e6ddSAlexander Duyck hlist_del_rcu(&fa->fa_list); 18627289e6ddSAlexander Duyck fib_release_info(fa->fa_info); 18637289e6ddSAlexander Duyck alias_free_mem_rcu(fa); 18647289e6ddSAlexander Duyck found++; 18657289e6ddSAlexander Duyck } 18667289e6ddSAlexander Duyck 18677289e6ddSAlexander Duyck /* update leaf slen */ 18687289e6ddSAlexander Duyck n->slen = slen; 18697289e6ddSAlexander Duyck 18707289e6ddSAlexander Duyck if (hlist_empty(&n->leaf)) { 187188bae714SAlexander Duyck put_child_root(pn, n->key, NULL); 18727289e6ddSAlexander Duyck node_free(n); 18737289e6ddSAlexander Duyck } 187488bae714SAlexander Duyck } 18757289e6ddSAlexander Duyck 18760c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 187719baf839SRobert Olsson return found; 187819baf839SRobert Olsson } 187919baf839SRobert Olsson 1880a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head) 18814aa2c466SPavel Emelyanov { 1882a7e53531SAlexander Duyck struct fib_table *tb = container_of(head, struct fib_table, rcu); 18838274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 18848274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 18858274a97aSAlexander Duyck 18860ddcf43dSAlexander Duyck if (tb->tb_data == tb->__data) 18878274a97aSAlexander Duyck free_percpu(t->stats); 18888274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 18894aa2c466SPavel Emelyanov kfree(tb); 18904aa2c466SPavel Emelyanov } 18914aa2c466SPavel Emelyanov 1892a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb) 1893a7e53531SAlexander Duyck { 1894a7e53531SAlexander Duyck call_rcu(&tb->rcu, __trie_free_rcu); 1895a7e53531SAlexander Duyck } 1896a7e53531SAlexander Duyck 189735c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 189819baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 189919baf839SRobert Olsson { 190079e5ad2cSAlexander Duyck __be32 xkey = htonl(l->key); 190119baf839SRobert Olsson struct fib_alias *fa; 190279e5ad2cSAlexander Duyck int i, s_i; 190319baf839SRobert Olsson 190479e5ad2cSAlexander Duyck s_i = cb->args[4]; 190519baf839SRobert Olsson i = 0; 190619baf839SRobert Olsson 19072373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 190879e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 190919baf839SRobert Olsson if (i < s_i) { 191019baf839SRobert Olsson i++; 191119baf839SRobert Olsson continue; 191219baf839SRobert Olsson } 191319baf839SRobert Olsson 19140ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 19150ddcf43dSAlexander Duyck i++; 19160ddcf43dSAlexander Duyck continue; 19170ddcf43dSAlexander Duyck } 19180ddcf43dSAlexander Duyck 191915e47304SEric W. Biederman if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 192019baf839SRobert Olsson cb->nlh->nlmsg_seq, 192119baf839SRobert Olsson RTM_NEWROUTE, 192219baf839SRobert Olsson tb->tb_id, 192319baf839SRobert Olsson fa->fa_type, 1924be403ea1SThomas Graf xkey, 19259b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 192619baf839SRobert Olsson fa->fa_tos, 192764347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 192871d67e66SStephen Hemminger cb->args[4] = i; 192919baf839SRobert Olsson return -1; 193019baf839SRobert Olsson } 1931a88ee229SStephen Hemminger i++; 193219baf839SRobert Olsson } 1933a88ee229SStephen Hemminger 193471d67e66SStephen Hemminger cb->args[4] = i; 193519baf839SRobert Olsson return skb->len; 193619baf839SRobert Olsson } 193719baf839SRobert Olsson 1938a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */ 193916c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1940a07f5f50SStephen Hemminger struct netlink_callback *cb) 194119baf839SRobert Olsson { 194219baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 194388bae714SAlexander Duyck struct key_vector *l, *tp = t->kv; 1944d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1945d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1946d5ce8a0eSStephen Hemminger */ 19478be33e95SAlexander Duyck int count = cb->args[2]; 19488be33e95SAlexander Duyck t_key key = cb->args[3]; 1949a88ee229SStephen Hemminger 19508be33e95SAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 1951a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 19528be33e95SAlexander Duyck cb->args[3] = key; 19538be33e95SAlexander Duyck cb->args[2] = count; 195419baf839SRobert Olsson return -1; 195519baf839SRobert Olsson } 1956d5ce8a0eSStephen Hemminger 195771d67e66SStephen Hemminger ++count; 19588be33e95SAlexander Duyck key = l->key + 1; 19598be33e95SAlexander Duyck 196071d67e66SStephen Hemminger memset(&cb->args[4], 0, 196171d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 19628be33e95SAlexander Duyck 19638be33e95SAlexander Duyck /* stop loop if key wrapped back to 0 */ 19648be33e95SAlexander Duyck if (key < l->key) 19658be33e95SAlexander Duyck break; 1966a88ee229SStephen Hemminger } 19678be33e95SAlexander Duyck 19688be33e95SAlexander Duyck cb->args[3] = key; 19698be33e95SAlexander Duyck cb->args[2] = count; 19708be33e95SAlexander Duyck 1971a88ee229SStephen Hemminger return skb->len; 1972a88ee229SStephen Hemminger } 197319baf839SRobert Olsson 19745348ba85SDavid S. Miller void __init fib_trie_init(void) 19757f9b8052SStephen Hemminger { 1976a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1977a07f5f50SStephen Hemminger sizeof(struct fib_alias), 1978bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 1979bc3c8c1eSStephen Hemminger 1980bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 198141b489fdSAlexander Duyck LEAF_SIZE, 1982bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 19837f9b8052SStephen Hemminger } 198419baf839SRobert Olsson 19850ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) 198619baf839SRobert Olsson { 198719baf839SRobert Olsson struct fib_table *tb; 198819baf839SRobert Olsson struct trie *t; 19890ddcf43dSAlexander Duyck size_t sz = sizeof(*tb); 199019baf839SRobert Olsson 19910ddcf43dSAlexander Duyck if (!alias) 19920ddcf43dSAlexander Duyck sz += sizeof(struct trie); 19930ddcf43dSAlexander Duyck 19940ddcf43dSAlexander Duyck tb = kzalloc(sz, GFP_KERNEL); 199551456b29SIan Morris if (!tb) 199619baf839SRobert Olsson return NULL; 199719baf839SRobert Olsson 199819baf839SRobert Olsson tb->tb_id = id; 199921d8c49eSDavid S. Miller tb->tb_num_default = 0; 20000ddcf43dSAlexander Duyck tb->tb_data = (alias ? alias->__data : tb->__data); 20010ddcf43dSAlexander Duyck 20020ddcf43dSAlexander Duyck if (alias) 20030ddcf43dSAlexander Duyck return tb; 200419baf839SRobert Olsson 200519baf839SRobert Olsson t = (struct trie *) tb->tb_data; 200688bae714SAlexander Duyck t->kv[0].pos = KEYLENGTH; 200788bae714SAlexander Duyck t->kv[0].slen = KEYLENGTH; 20088274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 20098274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 20108274a97aSAlexander Duyck if (!t->stats) { 20118274a97aSAlexander Duyck kfree(tb); 20128274a97aSAlexander Duyck tb = NULL; 20138274a97aSAlexander Duyck } 20148274a97aSAlexander Duyck #endif 201519baf839SRobert Olsson 201619baf839SRobert Olsson return tb; 201719baf839SRobert Olsson } 201819baf839SRobert Olsson 2019cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2020cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2021cb7b593cSStephen Hemminger struct fib_trie_iter { 20221c340b2fSDenis V. Lunev struct seq_net_private p; 20233d3b2d25SStephen Hemminger struct fib_table *tb; 202435c6edacSAlexander Duyck struct key_vector *tnode; 2025a034ee3cSEric Dumazet unsigned int index; 2026a034ee3cSEric Dumazet unsigned int depth; 2027cb7b593cSStephen Hemminger }; 202819baf839SRobert Olsson 202935c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) 203019baf839SRobert Olsson { 203198293e8dSAlexander Duyck unsigned long cindex = iter->index; 203288bae714SAlexander Duyck struct key_vector *pn = iter->tnode; 203388bae714SAlexander Duyck t_key pkey; 20346640e697SEric W. Biederman 2035cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2036cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 203719baf839SRobert Olsson 203888bae714SAlexander Duyck while (!IS_TRIE(pn)) { 203988bae714SAlexander Duyck while (cindex < child_length(pn)) { 204088bae714SAlexander Duyck struct key_vector *n = get_child_rcu(pn, cindex++); 204188bae714SAlexander Duyck 204288bae714SAlexander Duyck if (!n) 204388bae714SAlexander Duyck continue; 204488bae714SAlexander Duyck 204519baf839SRobert Olsson if (IS_LEAF(n)) { 204688bae714SAlexander Duyck iter->tnode = pn; 204788bae714SAlexander Duyck iter->index = cindex; 204891b9a277SOlof Johansson } else { 2049cb7b593cSStephen Hemminger /* push down one level */ 2050adaf9816SAlexander Duyck iter->tnode = n; 2051cb7b593cSStephen Hemminger iter->index = 0; 2052cb7b593cSStephen Hemminger ++iter->depth; 205319baf839SRobert Olsson } 205488bae714SAlexander Duyck 2055cb7b593cSStephen Hemminger return n; 205619baf839SRobert Olsson } 205719baf839SRobert Olsson 2058cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 205988bae714SAlexander Duyck pkey = pn->key; 206088bae714SAlexander Duyck pn = node_parent_rcu(pn); 206188bae714SAlexander Duyck cindex = get_index(pkey, pn) + 1; 2062cb7b593cSStephen Hemminger --iter->depth; 2063cb7b593cSStephen Hemminger } 2064cb7b593cSStephen Hemminger 206588bae714SAlexander Duyck /* record root node so further searches know we are done */ 206688bae714SAlexander Duyck iter->tnode = pn; 206788bae714SAlexander Duyck iter->index = 0; 206888bae714SAlexander Duyck 2069cb7b593cSStephen Hemminger return NULL; 2070cb7b593cSStephen Hemminger } 2071cb7b593cSStephen Hemminger 207235c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2073cb7b593cSStephen Hemminger struct trie *t) 2074cb7b593cSStephen Hemminger { 2075f38b24c9SFiro Yang struct key_vector *n, *pn; 20765ddf0eb2SRobert Olsson 20775ddf0eb2SRobert Olsson if (!t) 20785ddf0eb2SRobert Olsson return NULL; 20795ddf0eb2SRobert Olsson 2080f38b24c9SFiro Yang pn = t->kv; 208188bae714SAlexander Duyck n = rcu_dereference(pn->tnode[0]); 20823d3b2d25SStephen Hemminger if (!n) 20835ddf0eb2SRobert Olsson return NULL; 2084cb7b593cSStephen Hemminger 20856640e697SEric W. Biederman if (IS_TNODE(n)) { 2086adaf9816SAlexander Duyck iter->tnode = n; 2087cb7b593cSStephen Hemminger iter->index = 0; 20881d25cd6cSRobert Olsson iter->depth = 1; 20896640e697SEric W. Biederman } else { 209088bae714SAlexander Duyck iter->tnode = pn; 20916640e697SEric W. Biederman iter->index = 0; 20926640e697SEric W. Biederman iter->depth = 0; 20936640e697SEric W. Biederman } 20943d3b2d25SStephen Hemminger 2095cb7b593cSStephen Hemminger return n; 2096cb7b593cSStephen Hemminger } 2097cb7b593cSStephen Hemminger 2098cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 209919baf839SRobert Olsson { 210035c6edacSAlexander Duyck struct key_vector *n; 2101cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2102cb7b593cSStephen Hemminger 2103cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 210419baf839SRobert Olsson 21052373ce1cSRobert Olsson rcu_read_lock(); 21063d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2107cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 210879e5ad2cSAlexander Duyck struct fib_alias *fa; 210993672292SStephen Hemminger 211019baf839SRobert Olsson s->leaves++; 2111cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2112cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2113cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 211493672292SStephen Hemminger 211579e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) 211693672292SStephen Hemminger ++s->prefixes; 211791b9a277SOlof Johansson } else { 211819baf839SRobert Olsson s->tnodes++; 2119adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 2120adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 21216e22d174SAlexander Duyck s->nullpointers += tn_info(n)->empty_children; 212219baf839SRobert Olsson } 212398293e8dSAlexander Duyck } 21242373ce1cSRobert Olsson rcu_read_unlock(); 212519baf839SRobert Olsson } 212619baf839SRobert Olsson 212719baf839SRobert Olsson /* 212819baf839SRobert Olsson * This outputs /proc/net/fib_triestats 212919baf839SRobert Olsson */ 2130cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 213119baf839SRobert Olsson { 2132a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 213319baf839SRobert Olsson 213419baf839SRobert Olsson if (stat->leaves) 213519baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 213619baf839SRobert Olsson else 213719baf839SRobert Olsson avdepth = 0; 213819baf839SRobert Olsson 2139a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2140a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2141cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2142cb7b593cSStephen Hemminger 2143cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 214441b489fdSAlexander Duyck bytes = LEAF_SIZE * stat->leaves; 214593672292SStephen Hemminger 214693672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 214779e5ad2cSAlexander Duyck bytes += sizeof(struct fib_alias) * stat->prefixes; 214893672292SStephen Hemminger 2149187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 215041b489fdSAlexander Duyck bytes += TNODE_SIZE(0) * stat->tnodes; 215119baf839SRobert Olsson 215206ef921dSRobert Olsson max = MAX_STAT_DEPTH; 215306ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 215419baf839SRobert Olsson max--; 215519baf839SRobert Olsson 2156cb7b593cSStephen Hemminger pointers = 0; 2157f585a991SJerry Snitselaar for (i = 1; i < max; i++) 215819baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2159187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 216019baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 216119baf839SRobert Olsson } 2162cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2163187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2164cb7b593cSStephen Hemminger 216535c6edacSAlexander Duyck bytes += sizeof(struct key_vector *) * pointers; 2166187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2167187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 216866a2f7fdSStephen Hemminger } 216919baf839SRobert Olsson 217019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 217166a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 21728274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 217366a2f7fdSStephen Hemminger { 21748274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 21758274a97aSAlexander Duyck int cpu; 21768274a97aSAlexander Duyck 21778274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 21788274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 21798274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 21808274a97aSAlexander Duyck 21818274a97aSAlexander Duyck s.gets += pcpu->gets; 21828274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 21838274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 21848274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 21858274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 21868274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 21878274a97aSAlexander Duyck } 21888274a97aSAlexander Duyck 218966a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 21908274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 21918274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2192a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 21938274a97aSAlexander Duyck s.semantic_match_passed); 21948274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 21958274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 21968274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 219719baf839SRobert Olsson } 219866a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 219966a2f7fdSStephen Hemminger 22003d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2201d717a9a6SStephen Hemminger { 22023d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 22033d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 22043d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 22053d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 22063d3b2d25SStephen Hemminger else 22073d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2208d717a9a6SStephen Hemminger } 220919baf839SRobert Olsson 22103d3b2d25SStephen Hemminger 221119baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 221219baf839SRobert Olsson { 22131c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 22143d3b2d25SStephen Hemminger unsigned int h; 2215877a9bffSEric W. Biederman 2216d717a9a6SStephen Hemminger seq_printf(seq, 2217a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2218a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 221941b489fdSAlexander Duyck LEAF_SIZE, TNODE_SIZE(0)); 222019baf839SRobert Olsson 22213d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22223d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22233d3b2d25SStephen Hemminger struct fib_table *tb; 2224cb7b593cSStephen Hemminger 2225b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 22263d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 22273d3b2d25SStephen Hemminger struct trie_stat stat; 22283d3b2d25SStephen Hemminger 22293d3b2d25SStephen Hemminger if (!t) 22303d3b2d25SStephen Hemminger continue; 22313d3b2d25SStephen Hemminger 22323d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22333d3b2d25SStephen Hemminger 22343d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22353d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22363d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22378274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 22383d3b2d25SStephen Hemminger #endif 22393d3b2d25SStephen Hemminger } 22403d3b2d25SStephen Hemminger } 2241cb7b593cSStephen Hemminger 224219baf839SRobert Olsson return 0; 224319baf839SRobert Olsson } 224419baf839SRobert Olsson 224519baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 224619baf839SRobert Olsson { 2247de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 22481c340b2fSDenis V. Lunev } 22491c340b2fSDenis V. Lunev 22509a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 225119baf839SRobert Olsson .owner = THIS_MODULE, 225219baf839SRobert Olsson .open = fib_triestat_seq_open, 225319baf839SRobert Olsson .read = seq_read, 225419baf839SRobert Olsson .llseek = seq_lseek, 2255b6fcbdb4SPavel Emelyanov .release = single_release_net, 225619baf839SRobert Olsson }; 225719baf839SRobert Olsson 225835c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 225919baf839SRobert Olsson { 22601218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 22611218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2262cb7b593cSStephen Hemminger loff_t idx = 0; 22633d3b2d25SStephen Hemminger unsigned int h; 22643d3b2d25SStephen Hemminger 22653d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22663d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22673d3b2d25SStephen Hemminger struct fib_table *tb; 22683d3b2d25SStephen Hemminger 2269b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 227035c6edacSAlexander Duyck struct key_vector *n; 2271cb7b593cSStephen Hemminger 22723d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 22733d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 22743d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 22753d3b2d25SStephen Hemminger if (pos == idx++) { 22763d3b2d25SStephen Hemminger iter->tb = tb; 2277cb7b593cSStephen Hemminger return n; 227819baf839SRobert Olsson } 22793d3b2d25SStephen Hemminger } 22803d3b2d25SStephen Hemminger } 228119baf839SRobert Olsson 228219baf839SRobert Olsson return NULL; 228319baf839SRobert Olsson } 228419baf839SRobert Olsson 228519baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2286c95aaf9aSStephen Hemminger __acquires(RCU) 228719baf839SRobert Olsson { 2288cb7b593cSStephen Hemminger rcu_read_lock(); 22891218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 229019baf839SRobert Olsson } 229119baf839SRobert Olsson 229219baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 229319baf839SRobert Olsson { 2294cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 22951218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 22963d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 22973d3b2d25SStephen Hemminger struct hlist_node *tb_node; 22983d3b2d25SStephen Hemminger unsigned int h; 229935c6edacSAlexander Duyck struct key_vector *n; 2300cb7b593cSStephen Hemminger 230119baf839SRobert Olsson ++*pos; 23023d3b2d25SStephen Hemminger /* next node in same table */ 23033d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 23043d3b2d25SStephen Hemminger if (n) 23053d3b2d25SStephen Hemminger return n; 230691b9a277SOlof Johansson 23073d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 23083d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 23090a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 23103d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 23113d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23123d3b2d25SStephen Hemminger if (n) 23133d3b2d25SStephen Hemminger goto found; 23143d3b2d25SStephen Hemminger } 2315cb7b593cSStephen Hemminger 23163d3b2d25SStephen Hemminger /* new hash chain */ 23173d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 23183d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2319b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 23203d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23213d3b2d25SStephen Hemminger if (n) 23223d3b2d25SStephen Hemminger goto found; 23233d3b2d25SStephen Hemminger } 23243d3b2d25SStephen Hemminger } 2325cb7b593cSStephen Hemminger return NULL; 23263d3b2d25SStephen Hemminger 23273d3b2d25SStephen Hemminger found: 23283d3b2d25SStephen Hemminger iter->tb = tb; 23293d3b2d25SStephen Hemminger return n; 233019baf839SRobert Olsson } 233119baf839SRobert Olsson 233219baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2333c95aaf9aSStephen Hemminger __releases(RCU) 233419baf839SRobert Olsson { 2335cb7b593cSStephen Hemminger rcu_read_unlock(); 233619baf839SRobert Olsson } 233719baf839SRobert Olsson 2338cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2339cb7b593cSStephen Hemminger { 2340a034ee3cSEric Dumazet while (n-- > 0) 2341a034ee3cSEric Dumazet seq_puts(seq, " "); 2342cb7b593cSStephen Hemminger } 234319baf839SRobert Olsson 234428d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2345cb7b593cSStephen Hemminger { 2346cb7b593cSStephen Hemminger switch (s) { 2347cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2348cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2349cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2350cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2351cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2352cb7b593cSStephen Hemminger default: 235328d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2354cb7b593cSStephen Hemminger return buf; 2355cb7b593cSStephen Hemminger } 2356cb7b593cSStephen Hemminger } 2357cb7b593cSStephen Hemminger 235836cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2359cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2360cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2361cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2362cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2363cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2364cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2365cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2366cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2367cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2368cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2369cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2370cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2371cb7b593cSStephen Hemminger }; 2372cb7b593cSStephen Hemminger 2373a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2374cb7b593cSStephen Hemminger { 2375cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2376cb7b593cSStephen Hemminger return rtn_type_names[t]; 237728d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2378cb7b593cSStephen Hemminger return buf; 2379cb7b593cSStephen Hemminger } 2380cb7b593cSStephen Hemminger 2381cb7b593cSStephen Hemminger /* Pretty print the trie */ 238219baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 238319baf839SRobert Olsson { 2384cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 238535c6edacSAlexander Duyck struct key_vector *n = v; 238619baf839SRobert Olsson 238788bae714SAlexander Duyck if (IS_TRIE(node_parent_rcu(n))) 23883d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2389095b8501SRobert Olsson 2390095b8501SRobert Olsson if (IS_TNODE(n)) { 2391adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2392095b8501SRobert Olsson 23931d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2394e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2395e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 23966e22d174SAlexander Duyck tn_info(n)->full_children, 23976e22d174SAlexander Duyck tn_info(n)->empty_children); 2398cb7b593cSStephen Hemminger } else { 2399adaf9816SAlexander Duyck __be32 val = htonl(n->key); 240079e5ad2cSAlexander Duyck struct fib_alias *fa; 2401cb7b593cSStephen Hemminger 2402cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2403673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 240428d36e37SEric Dumazet 240579e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 240628d36e37SEric Dumazet char buf1[32], buf2[32]; 240728d36e37SEric Dumazet 2408cb7b593cSStephen Hemminger seq_indent(seq, iter->depth + 1); 24095786ec60SAlexander Duyck seq_printf(seq, " /%zu %s %s", 24109b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 241128d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 241237e826c5SDavid S. Miller fa->fa_info->fib_scope), 241328d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 241428d36e37SEric Dumazet fa->fa_type)); 2415cb7b593cSStephen Hemminger if (fa->fa_tos) 2416b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2417cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2418cb7b593cSStephen Hemminger } 2419cb7b593cSStephen Hemminger } 242019baf839SRobert Olsson 242119baf839SRobert Olsson return 0; 242219baf839SRobert Olsson } 242319baf839SRobert Olsson 2424f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 242519baf839SRobert Olsson .start = fib_trie_seq_start, 242619baf839SRobert Olsson .next = fib_trie_seq_next, 242719baf839SRobert Olsson .stop = fib_trie_seq_stop, 242819baf839SRobert Olsson .show = fib_trie_seq_show, 242919baf839SRobert Olsson }; 243019baf839SRobert Olsson 243119baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 243219baf839SRobert Olsson { 24331c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2434cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 243519baf839SRobert Olsson } 243619baf839SRobert Olsson 24379a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 243819baf839SRobert Olsson .owner = THIS_MODULE, 243919baf839SRobert Olsson .open = fib_trie_seq_open, 244019baf839SRobert Olsson .read = seq_read, 244119baf839SRobert Olsson .llseek = seq_lseek, 24421c340b2fSDenis V. Lunev .release = seq_release_net, 244319baf839SRobert Olsson }; 244419baf839SRobert Olsson 24458315f5d8SStephen Hemminger struct fib_route_iter { 24468315f5d8SStephen Hemminger struct seq_net_private p; 24478be33e95SAlexander Duyck struct fib_table *main_tb; 244835c6edacSAlexander Duyck struct key_vector *tnode; 24498315f5d8SStephen Hemminger loff_t pos; 24508315f5d8SStephen Hemminger t_key key; 24518315f5d8SStephen Hemminger }; 24528315f5d8SStephen Hemminger 245335c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, 245435c6edacSAlexander Duyck loff_t pos) 24558315f5d8SStephen Hemminger { 24568be33e95SAlexander Duyck struct fib_table *tb = iter->main_tb; 245735c6edacSAlexander Duyck struct key_vector *l, **tp = &iter->tnode; 24588be33e95SAlexander Duyck struct trie *t; 24598be33e95SAlexander Duyck t_key key; 24608315f5d8SStephen Hemminger 24618be33e95SAlexander Duyck /* use cache location of next-to-find key */ 24628be33e95SAlexander Duyck if (iter->pos > 0 && pos >= iter->pos) { 24638315f5d8SStephen Hemminger pos -= iter->pos; 24648be33e95SAlexander Duyck key = iter->key; 24658be33e95SAlexander Duyck } else { 24668be33e95SAlexander Duyck t = (struct trie *)tb->tb_data; 246788bae714SAlexander Duyck iter->tnode = t->kv; 24688315f5d8SStephen Hemminger iter->pos = 0; 24698be33e95SAlexander Duyck key = 0; 24708315f5d8SStephen Hemminger } 24718315f5d8SStephen Hemminger 24728be33e95SAlexander Duyck while ((l = leaf_walk_rcu(tp, key)) != NULL) { 24738be33e95SAlexander Duyck key = l->key + 1; 24748315f5d8SStephen Hemminger iter->pos++; 24758be33e95SAlexander Duyck 247625b97c01SAndy Whitcroft if (--pos <= 0) 24778be33e95SAlexander Duyck break; 24788be33e95SAlexander Duyck 24798be33e95SAlexander Duyck l = NULL; 24808be33e95SAlexander Duyck 24818be33e95SAlexander Duyck /* handle unlikely case of a key wrap */ 24828be33e95SAlexander Duyck if (!key) 24838be33e95SAlexander Duyck break; 24848315f5d8SStephen Hemminger } 24858315f5d8SStephen Hemminger 24868315f5d8SStephen Hemminger if (l) 24878be33e95SAlexander Duyck iter->key = key; /* remember it */ 24888315f5d8SStephen Hemminger else 24898315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 24908315f5d8SStephen Hemminger 24918315f5d8SStephen Hemminger return l; 24928315f5d8SStephen Hemminger } 24938315f5d8SStephen Hemminger 24948315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 24958315f5d8SStephen Hemminger __acquires(RCU) 24968315f5d8SStephen Hemminger { 24978315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 24988315f5d8SStephen Hemminger struct fib_table *tb; 24998be33e95SAlexander Duyck struct trie *t; 25008315f5d8SStephen Hemminger 25018315f5d8SStephen Hemminger rcu_read_lock(); 25028be33e95SAlexander Duyck 25031218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 25048315f5d8SStephen Hemminger if (!tb) 25058315f5d8SStephen Hemminger return NULL; 25068315f5d8SStephen Hemminger 25078be33e95SAlexander Duyck iter->main_tb = tb; 25088be33e95SAlexander Duyck 25098be33e95SAlexander Duyck if (*pos != 0) 25108be33e95SAlexander Duyck return fib_route_get_idx(iter, *pos); 25118be33e95SAlexander Duyck 25128be33e95SAlexander Duyck t = (struct trie *)tb->tb_data; 251388bae714SAlexander Duyck iter->tnode = t->kv; 25148be33e95SAlexander Duyck iter->pos = 0; 25158be33e95SAlexander Duyck iter->key = 0; 25168be33e95SAlexander Duyck 25178315f5d8SStephen Hemminger return SEQ_START_TOKEN; 25188315f5d8SStephen Hemminger } 25198315f5d8SStephen Hemminger 25208315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25218315f5d8SStephen Hemminger { 25228315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 252335c6edacSAlexander Duyck struct key_vector *l = NULL; 25248be33e95SAlexander Duyck t_key key = iter->key; 25258315f5d8SStephen Hemminger 25268315f5d8SStephen Hemminger ++*pos; 25278be33e95SAlexander Duyck 25288be33e95SAlexander Duyck /* only allow key of 0 for start of sequence */ 25298be33e95SAlexander Duyck if ((v == SEQ_START_TOKEN) || key) 25308be33e95SAlexander Duyck l = leaf_walk_rcu(&iter->tnode, key); 25318be33e95SAlexander Duyck 25328be33e95SAlexander Duyck if (l) { 25338be33e95SAlexander Duyck iter->key = l->key + 1; 25348315f5d8SStephen Hemminger iter->pos++; 25358be33e95SAlexander Duyck } else { 25368be33e95SAlexander Duyck iter->pos = 0; 25378315f5d8SStephen Hemminger } 25388315f5d8SStephen Hemminger 25398315f5d8SStephen Hemminger return l; 25408315f5d8SStephen Hemminger } 25418315f5d8SStephen Hemminger 25428315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 25438315f5d8SStephen Hemminger __releases(RCU) 25448315f5d8SStephen Hemminger { 25458315f5d8SStephen Hemminger rcu_read_unlock(); 25468315f5d8SStephen Hemminger } 25478315f5d8SStephen Hemminger 2548a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2549cb7b593cSStephen Hemminger { 2550a034ee3cSEric Dumazet unsigned int flags = 0; 2551cb7b593cSStephen Hemminger 2552a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2553a034ee3cSEric Dumazet flags = RTF_REJECT; 2554cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2555cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 255632ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2557cb7b593cSStephen Hemminger flags |= RTF_HOST; 2558cb7b593cSStephen Hemminger flags |= RTF_UP; 2559cb7b593cSStephen Hemminger return flags; 2560cb7b593cSStephen Hemminger } 2561cb7b593cSStephen Hemminger 2562cb7b593cSStephen Hemminger /* 2563cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2564cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2565cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2566cb7b593cSStephen Hemminger * legacy utilities 2567cb7b593cSStephen Hemminger */ 2568cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2569cb7b593cSStephen Hemminger { 2570654eff45SAlexander Duyck struct fib_route_iter *iter = seq->private; 2571654eff45SAlexander Duyck struct fib_table *tb = iter->main_tb; 257279e5ad2cSAlexander Duyck struct fib_alias *fa; 257335c6edacSAlexander Duyck struct key_vector *l = v; 25749b6ebad5SAlexander Duyck __be32 prefix; 2575cb7b593cSStephen Hemminger 2576cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2577cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2578cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2579cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2580cb7b593cSStephen Hemminger return 0; 2581cb7b593cSStephen Hemminger } 2582cb7b593cSStephen Hemminger 25839b6ebad5SAlexander Duyck prefix = htonl(l->key); 25849b6ebad5SAlexander Duyck 258579e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 25861371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 25879b6ebad5SAlexander Duyck __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); 2588a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2589cb7b593cSStephen Hemminger 259079e5ad2cSAlexander Duyck if ((fa->fa_type == RTN_BROADCAST) || 259179e5ad2cSAlexander Duyck (fa->fa_type == RTN_MULTICAST)) 2592cb7b593cSStephen Hemminger continue; 2593cb7b593cSStephen Hemminger 2594654eff45SAlexander Duyck if (fa->tb_id != tb->tb_id) 2595654eff45SAlexander Duyck continue; 2596654eff45SAlexander Duyck 2597652586dfSTetsuo Handa seq_setwidth(seq, 127); 2598652586dfSTetsuo Handa 2599cb7b593cSStephen Hemminger if (fi) 26005e659e4cSPavel Emelyanov seq_printf(seq, 26015e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2602652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2603cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2604cb7b593cSStephen Hemminger prefix, 2605cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2606cb7b593cSStephen Hemminger fi->fib_priority, 2607cb7b593cSStephen Hemminger mask, 2608a07f5f50SStephen Hemminger (fi->fib_advmss ? 2609a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2610cb7b593cSStephen Hemminger fi->fib_window, 2611652586dfSTetsuo Handa fi->fib_rtt >> 3); 2612cb7b593cSStephen Hemminger else 26135e659e4cSPavel Emelyanov seq_printf(seq, 26145e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2615652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2616cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2617652586dfSTetsuo Handa mask, 0, 0, 0); 2618cb7b593cSStephen Hemminger 2619652586dfSTetsuo Handa seq_pad(seq, '\n'); 2620cb7b593cSStephen Hemminger } 2621cb7b593cSStephen Hemminger 2622cb7b593cSStephen Hemminger return 0; 2623cb7b593cSStephen Hemminger } 2624cb7b593cSStephen Hemminger 2625f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 26268315f5d8SStephen Hemminger .start = fib_route_seq_start, 26278315f5d8SStephen Hemminger .next = fib_route_seq_next, 26288315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2629cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2630cb7b593cSStephen Hemminger }; 2631cb7b593cSStephen Hemminger 2632cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2633cb7b593cSStephen Hemminger { 26341c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 26358315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2636cb7b593cSStephen Hemminger } 2637cb7b593cSStephen Hemminger 26389a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2639cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2640cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2641cb7b593cSStephen Hemminger .read = seq_read, 2642cb7b593cSStephen Hemminger .llseek = seq_lseek, 26431c340b2fSDenis V. Lunev .release = seq_release_net, 2644cb7b593cSStephen Hemminger }; 2645cb7b593cSStephen Hemminger 264661a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 264719baf839SRobert Olsson { 2648d4beaa66SGao feng if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) 2649cb7b593cSStephen Hemminger goto out1; 2650cb7b593cSStephen Hemminger 2651d4beaa66SGao feng if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, 265261a02653SDenis V. Lunev &fib_triestat_fops)) 2653cb7b593cSStephen Hemminger goto out2; 2654cb7b593cSStephen Hemminger 2655d4beaa66SGao feng if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) 2656cb7b593cSStephen Hemminger goto out3; 2657cb7b593cSStephen Hemminger 265819baf839SRobert Olsson return 0; 2659cb7b593cSStephen Hemminger 2660cb7b593cSStephen Hemminger out3: 2661ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2662cb7b593cSStephen Hemminger out2: 2663ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2664cb7b593cSStephen Hemminger out1: 2665cb7b593cSStephen Hemminger return -ENOMEM; 266619baf839SRobert Olsson } 266719baf839SRobert Olsson 266861a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 266919baf839SRobert Olsson { 2670ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2671ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2672ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 267319baf839SRobert Olsson } 267419baf839SRobert Olsson 267519baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2676