119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 541977f032SJiri Slaby #include <linux/bitops.h> 5519baf839SRobert Olsson #include <linux/types.h> 5619baf839SRobert Olsson #include <linux/kernel.h> 5719baf839SRobert Olsson #include <linux/mm.h> 5819baf839SRobert Olsson #include <linux/string.h> 5919baf839SRobert Olsson #include <linux/socket.h> 6019baf839SRobert Olsson #include <linux/sockios.h> 6119baf839SRobert Olsson #include <linux/errno.h> 6219baf839SRobert Olsson #include <linux/in.h> 6319baf839SRobert Olsson #include <linux/inet.h> 64cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6519baf839SRobert Olsson #include <linux/netdevice.h> 6619baf839SRobert Olsson #include <linux/if_arp.h> 6719baf839SRobert Olsson #include <linux/proc_fs.h> 682373ce1cSRobert Olsson #include <linux/rcupdate.h> 6919baf839SRobert Olsson #include <linux/skbuff.h> 7019baf839SRobert Olsson #include <linux/netlink.h> 7119baf839SRobert Olsson #include <linux/init.h> 7219baf839SRobert Olsson #include <linux/list.h> 735a0e3ad6STejun Heo #include <linux/slab.h> 74bc3b2d7fSPaul Gortmaker #include <linux/export.h> 75457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7619baf839SRobert Olsson #include <net/ip.h> 7719baf839SRobert Olsson #include <net/protocol.h> 7819baf839SRobert Olsson #include <net/route.h> 7919baf839SRobert Olsson #include <net/tcp.h> 8019baf839SRobert Olsson #include <net/sock.h> 8119baf839SRobert Olsson #include <net/ip_fib.h> 8219baf839SRobert Olsson #include "fib_lookup.h" 8319baf839SRobert Olsson 8406ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8519baf839SRobert Olsson 8619baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8795f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 8819baf839SRobert Olsson 8919baf839SRobert Olsson typedef unsigned int t_key; 9019baf839SRobert Olsson 9164c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 9264c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 932373ce1cSRobert Olsson 94e9b44019SAlexander Duyck #define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) 959f9e636dSAlexander Duyck 9664c9b6fbSAlexander Duyck struct tnode { 9764c9b6fbSAlexander Duyck t_key key; 9864c9b6fbSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 9964c9b6fbSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 1005405afd1SAlexander Duyck unsigned char slen; 10164c9b6fbSAlexander Duyck struct tnode __rcu *parent; 10264c9b6fbSAlexander Duyck struct rcu_head rcu; 103adaf9816SAlexander Duyck union { 104adaf9816SAlexander Duyck /* The fields in this struct are valid if bits > 0 (TNODE) */ 105adaf9816SAlexander Duyck struct { 10695f60ea3SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 10795f60ea3SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 108adaf9816SAlexander Duyck struct tnode __rcu *child[0]; 10964c9b6fbSAlexander Duyck }; 110adaf9816SAlexander Duyck /* This list pointer if valid if bits == 0 (LEAF) */ 11164c9b6fbSAlexander Duyck struct hlist_head list; 11219baf839SRobert Olsson }; 113adaf9816SAlexander Duyck }; 11419baf839SRobert Olsson 11519baf839SRobert Olsson struct leaf_info { 11619baf839SRobert Olsson struct hlist_node hlist; 11719baf839SRobert Olsson int plen; 1185c74501fSEric Dumazet u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ 11919baf839SRobert Olsson struct list_head falh; 1205c74501fSEric Dumazet struct rcu_head rcu; 12119baf839SRobert Olsson }; 12219baf839SRobert Olsson 12319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 12419baf839SRobert Olsson struct trie_use_stats { 12519baf839SRobert Olsson unsigned int gets; 12619baf839SRobert Olsson unsigned int backtrack; 12719baf839SRobert Olsson unsigned int semantic_match_passed; 12819baf839SRobert Olsson unsigned int semantic_match_miss; 12919baf839SRobert Olsson unsigned int null_node_hit; 1302f36895aSRobert Olsson unsigned int resize_node_skipped; 13119baf839SRobert Olsson }; 13219baf839SRobert Olsson #endif 13319baf839SRobert Olsson 13419baf839SRobert Olsson struct trie_stat { 13519baf839SRobert Olsson unsigned int totdepth; 13619baf839SRobert Olsson unsigned int maxdepth; 13719baf839SRobert Olsson unsigned int tnodes; 13819baf839SRobert Olsson unsigned int leaves; 13919baf839SRobert Olsson unsigned int nullpointers; 14093672292SStephen Hemminger unsigned int prefixes; 14106ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 14219baf839SRobert Olsson }; 14319baf839SRobert Olsson 14419baf839SRobert Olsson struct trie { 145adaf9816SAlexander Duyck struct tnode __rcu *trie; 14619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1478274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 14819baf839SRobert Olsson #endif 14919baf839SRobert Olsson }; 15019baf839SRobert Olsson 151ff181ed8SAlexander Duyck static void resize(struct trie *t, struct tnode *tn); 152c3059477SJarek Poplawski static size_t tnode_free_size; 153c3059477SJarek Poplawski 154c3059477SJarek Poplawski /* 155c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 156c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 157c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 158c3059477SJarek Poplawski */ 159c3059477SJarek Poplawski static const int sync_pages = 128; 16019baf839SRobert Olsson 161e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 162bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 16319baf839SRobert Olsson 16464c9b6fbSAlexander Duyck /* caller must hold RTNL */ 16564c9b6fbSAlexander Duyck #define node_parent(n) rtnl_dereference((n)->parent) 1660a5c0475SEric Dumazet 16764c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 16864c9b6fbSAlexander Duyck #define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) 1690a5c0475SEric Dumazet 17064c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 171adaf9816SAlexander Duyck static inline void node_set_parent(struct tnode *n, struct tnode *tp) 17206801916SStephen Hemminger { 173adaf9816SAlexander Duyck if (n) 174adaf9816SAlexander Duyck rcu_assign_pointer(n->parent, tp); 17564c9b6fbSAlexander Duyck } 17664c9b6fbSAlexander Duyck 17764c9b6fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) 17864c9b6fbSAlexander Duyck 17964c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 18064c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 18164c9b6fbSAlexander Duyck */ 18298293e8dSAlexander Duyck static inline unsigned long tnode_child_length(const struct tnode *tn) 18364c9b6fbSAlexander Duyck { 18464c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 18506801916SStephen Hemminger } 1862373ce1cSRobert Olsson 18798293e8dSAlexander Duyck /* caller must hold RTNL */ 18898293e8dSAlexander Duyck static inline struct tnode *tnode_get_child(const struct tnode *tn, 18998293e8dSAlexander Duyck unsigned long i) 19019baf839SRobert Olsson { 1910a5c0475SEric Dumazet return rtnl_dereference(tn->child[i]); 192b59cfbf7SEric Dumazet } 193b59cfbf7SEric Dumazet 19498293e8dSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 19598293e8dSAlexander Duyck static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, 19698293e8dSAlexander Duyck unsigned long i) 197b59cfbf7SEric Dumazet { 1980a5c0475SEric Dumazet return rcu_dereference_rtnl(tn->child[i]); 19919baf839SRobert Olsson } 20019baf839SRobert Olsson 201e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 202e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 203e9b44019SAlexander Duyck * all of the bits in that key are significant. 204e9b44019SAlexander Duyck * 205e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 206e9b44019SAlexander Duyck * 207e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 208e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 209e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 210e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 211e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 212e9b44019SAlexander Duyck * correct key path. 213e9b44019SAlexander Duyck * 214e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 215e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 216e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 217e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 218e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 219e9b44019SAlexander Duyck * 220e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 221e9b44019SAlexander Duyck * have many different meanings. 222e9b44019SAlexander Duyck * 223e9b44019SAlexander Duyck * Example: 224e9b44019SAlexander Duyck * _________________________________________________________________ 225e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 226e9b44019SAlexander Duyck * ----------------------------------------------------------------- 227e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 228e9b44019SAlexander Duyck * 229e9b44019SAlexander Duyck * _________________________________________________________________ 230e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 231e9b44019SAlexander Duyck * ----------------------------------------------------------------- 232e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 233e9b44019SAlexander Duyck * 234e9b44019SAlexander Duyck * tp->pos = 22 235e9b44019SAlexander Duyck * tp->bits = 3 236e9b44019SAlexander Duyck * n->pos = 13 237e9b44019SAlexander Duyck * n->bits = 4 238e9b44019SAlexander Duyck * 239e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 240e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 241e9b44019SAlexander Duyck * point we do not use them for anything. 242e9b44019SAlexander Duyck * 243e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 244e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 245e9b44019SAlexander Duyck * 'n' among tp's children. 246e9b44019SAlexander Duyck * 247e9b44019SAlexander Duyck * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits 248e9b44019SAlexander Duyck * for the node n. 249e9b44019SAlexander Duyck * 250e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 251e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 252e9b44019SAlexander Duyck * 253e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 254e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 255e9b44019SAlexander Duyck * 256e9b44019SAlexander Duyck * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown 257e9b44019SAlexander Duyck * at this point. 25819baf839SRobert Olsson */ 25919baf839SRobert Olsson 260f5026fabSDenis V. Lunev static const int halve_threshold = 25; 261f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 262345aa031SJarek Poplawski static const int halve_threshold_root = 15; 26380b71b80SJens Låås static const int inflate_threshold_root = 30; 2642373ce1cSRobert Olsson 2652373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 2662373ce1cSRobert Olsson { 2672373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 2682373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 2692373ce1cSRobert Olsson } 2702373ce1cSRobert Olsson 2712373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 2722373ce1cSRobert Olsson { 2732373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 2742373ce1cSRobert Olsson } 2752373ce1cSRobert Olsson 27637fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 277adaf9816SAlexander Duyck ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) 27837fd30f2SAlexander Duyck 27937fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 2802373ce1cSRobert Olsson { 281adaf9816SAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 28237fd30f2SAlexander Duyck 28337fd30f2SAlexander Duyck if (IS_LEAF(n)) 28437fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 28537fd30f2SAlexander Duyck else if (n->bits <= TNODE_KMALLOC_MAX) 28637fd30f2SAlexander Duyck kfree(n); 28737fd30f2SAlexander Duyck else 28837fd30f2SAlexander Duyck vfree(n); 2892373ce1cSRobert Olsson } 2902373ce1cSRobert Olsson 29137fd30f2SAlexander Duyck #define node_free(n) call_rcu(&n->rcu, __node_free_rcu) 292387a5487SStephen Hemminger 2932373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 2942373ce1cSRobert Olsson { 295bceb0f45SLai Jiangshan kfree_rcu(leaf, rcu); 2962373ce1cSRobert Olsson } 2972373ce1cSRobert Olsson 2988d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size) 2992373ce1cSRobert Olsson { 3002373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3018d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 30215be75cdSStephen Hemminger else 3037a1c8e5aSEric Dumazet return vzalloc(size); 30415be75cdSStephen Hemminger } 3052373ce1cSRobert Olsson 30695f60ea3SAlexander Duyck static inline void empty_child_inc(struct tnode *n) 30795f60ea3SAlexander Duyck { 30895f60ea3SAlexander Duyck ++n->empty_children ? : ++n->full_children; 30995f60ea3SAlexander Duyck } 31095f60ea3SAlexander Duyck 31195f60ea3SAlexander Duyck static inline void empty_child_dec(struct tnode *n) 31295f60ea3SAlexander Duyck { 31395f60ea3SAlexander Duyck n->empty_children-- ? : n->full_children--; 31495f60ea3SAlexander Duyck } 31595f60ea3SAlexander Duyck 316adaf9816SAlexander Duyck static struct tnode *leaf_new(t_key key) 31719baf839SRobert Olsson { 318adaf9816SAlexander Duyck struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 31919baf839SRobert Olsson if (l) { 32064c9b6fbSAlexander Duyck l->parent = NULL; 32164c9b6fbSAlexander Duyck /* set key and pos to reflect full key value 32264c9b6fbSAlexander Duyck * any trailing zeros in the key should be ignored 32364c9b6fbSAlexander Duyck * as the nodes are searched 32464c9b6fbSAlexander Duyck */ 32564c9b6fbSAlexander Duyck l->key = key; 3265405afd1SAlexander Duyck l->slen = 0; 327e9b44019SAlexander Duyck l->pos = 0; 32864c9b6fbSAlexander Duyck /* set bits to 0 indicating we are not a tnode */ 32964c9b6fbSAlexander Duyck l->bits = 0; 33064c9b6fbSAlexander Duyck 33119baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 33219baf839SRobert Olsson } 33319baf839SRobert Olsson return l; 33419baf839SRobert Olsson } 33519baf839SRobert Olsson 33619baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 33719baf839SRobert Olsson { 33819baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 3392373ce1cSRobert Olsson if (li) { 34019baf839SRobert Olsson li->plen = plen; 3415c74501fSEric Dumazet li->mask_plen = ntohl(inet_make_mask(plen)); 34219baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 3432373ce1cSRobert Olsson } 34419baf839SRobert Olsson return li; 34519baf839SRobert Olsson } 34619baf839SRobert Olsson 34719baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits) 34819baf839SRobert Olsson { 34995f60ea3SAlexander Duyck size_t sz = offsetof(struct tnode, child[1ul << bits]); 350f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 35164c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 35264c9b6fbSAlexander Duyck 35364c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 35464c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 35519baf839SRobert Olsson 35619baf839SRobert Olsson if (tn) { 35764c9b6fbSAlexander Duyck tn->parent = NULL; 3585405afd1SAlexander Duyck tn->slen = pos; 35919baf839SRobert Olsson tn->pos = pos; 36019baf839SRobert Olsson tn->bits = bits; 361e9b44019SAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 36295f60ea3SAlexander Duyck if (bits == KEYLENGTH) 36395f60ea3SAlexander Duyck tn->full_children = 1; 36495f60ea3SAlexander Duyck else 36595f60ea3SAlexander Duyck tn->empty_children = 1ul << bits; 36619baf839SRobert Olsson } 367c877efb2SStephen Hemminger 368a034ee3cSEric Dumazet pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 369adaf9816SAlexander Duyck sizeof(struct tnode *) << bits); 37019baf839SRobert Olsson return tn; 37119baf839SRobert Olsson } 37219baf839SRobert Olsson 373e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 37419baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 37519baf839SRobert Olsson */ 376adaf9816SAlexander Duyck static inline int tnode_full(const struct tnode *tn, const struct tnode *n) 37719baf839SRobert Olsson { 378e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 37919baf839SRobert Olsson } 38019baf839SRobert Olsson 381ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 38219baf839SRobert Olsson * Update the value of full_children and empty_children. 38319baf839SRobert Olsson */ 384ff181ed8SAlexander Duyck static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) 38519baf839SRobert Olsson { 38621d1f11dSAlexander Duyck struct tnode *chi = tnode_get_child(tn, i); 387ff181ed8SAlexander Duyck int isfull, wasfull; 38819baf839SRobert Olsson 38998293e8dSAlexander Duyck BUG_ON(i >= tnode_child_length(tn)); 3900c7770c7SStephen Hemminger 39195f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 39219baf839SRobert Olsson if (n == NULL && chi != NULL) 39395f60ea3SAlexander Duyck empty_child_inc(tn); 39495f60ea3SAlexander Duyck if (n != NULL && chi == NULL) 39595f60ea3SAlexander Duyck empty_child_dec(tn); 39619baf839SRobert Olsson 39719baf839SRobert Olsson /* update fullChildren */ 39819baf839SRobert Olsson wasfull = tnode_full(tn, chi); 39919baf839SRobert Olsson isfull = tnode_full(tn, n); 400ff181ed8SAlexander Duyck 40119baf839SRobert Olsson if (wasfull && !isfull) 40219baf839SRobert Olsson tn->full_children--; 40319baf839SRobert Olsson else if (!wasfull && isfull) 40419baf839SRobert Olsson tn->full_children++; 40591b9a277SOlof Johansson 4065405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4075405afd1SAlexander Duyck tn->slen = n->slen; 4085405afd1SAlexander Duyck 409cf778b00SEric Dumazet rcu_assign_pointer(tn->child[i], n); 41019baf839SRobert Olsson } 41119baf839SRobert Olsson 41269fa57b1SAlexander Duyck static void update_children(struct tnode *tn) 41369fa57b1SAlexander Duyck { 41469fa57b1SAlexander Duyck unsigned long i; 41569fa57b1SAlexander Duyck 41669fa57b1SAlexander Duyck /* update all of the child parent pointers */ 41769fa57b1SAlexander Duyck for (i = tnode_child_length(tn); i;) { 41869fa57b1SAlexander Duyck struct tnode *inode = tnode_get_child(tn, --i); 41969fa57b1SAlexander Duyck 42069fa57b1SAlexander Duyck if (!inode) 42169fa57b1SAlexander Duyck continue; 42269fa57b1SAlexander Duyck 42369fa57b1SAlexander Duyck /* Either update the children of a tnode that 42469fa57b1SAlexander Duyck * already belongs to us or update the child 42569fa57b1SAlexander Duyck * to point to ourselves. 42669fa57b1SAlexander Duyck */ 42769fa57b1SAlexander Duyck if (node_parent(inode) == tn) 42869fa57b1SAlexander Duyck update_children(inode); 42969fa57b1SAlexander Duyck else 43069fa57b1SAlexander Duyck node_set_parent(inode, tn); 43169fa57b1SAlexander Duyck } 43269fa57b1SAlexander Duyck } 43369fa57b1SAlexander Duyck 43469fa57b1SAlexander Duyck static inline void put_child_root(struct tnode *tp, struct trie *t, 435836a0123SAlexander Duyck t_key key, struct tnode *n) 436836a0123SAlexander Duyck { 437836a0123SAlexander Duyck if (tp) 438836a0123SAlexander Duyck put_child(tp, get_index(key, tp), n); 439836a0123SAlexander Duyck else 440836a0123SAlexander Duyck rcu_assign_pointer(t->trie, n); 441836a0123SAlexander Duyck } 442836a0123SAlexander Duyck 443fc86a93bSAlexander Duyck static inline void tnode_free_init(struct tnode *tn) 4440a5c0475SEric Dumazet { 445fc86a93bSAlexander Duyck tn->rcu.next = NULL; 4460a5c0475SEric Dumazet } 447fc86a93bSAlexander Duyck 448fc86a93bSAlexander Duyck static inline void tnode_free_append(struct tnode *tn, struct tnode *n) 449fc86a93bSAlexander Duyck { 450fc86a93bSAlexander Duyck n->rcu.next = tn->rcu.next; 451fc86a93bSAlexander Duyck tn->rcu.next = &n->rcu; 452fc86a93bSAlexander Duyck } 453fc86a93bSAlexander Duyck 454fc86a93bSAlexander Duyck static void tnode_free(struct tnode *tn) 455fc86a93bSAlexander Duyck { 456fc86a93bSAlexander Duyck struct callback_head *head = &tn->rcu; 457fc86a93bSAlexander Duyck 458fc86a93bSAlexander Duyck while (head) { 459fc86a93bSAlexander Duyck head = head->next; 460fc86a93bSAlexander Duyck tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); 46137fd30f2SAlexander Duyck node_free(tn); 462fc86a93bSAlexander Duyck 463fc86a93bSAlexander Duyck tn = container_of(head, struct tnode, rcu); 464fc86a93bSAlexander Duyck } 465fc86a93bSAlexander Duyck 466fc86a93bSAlexander Duyck if (tnode_free_size >= PAGE_SIZE * sync_pages) { 467fc86a93bSAlexander Duyck tnode_free_size = 0; 468fc86a93bSAlexander Duyck synchronize_rcu(); 469fc86a93bSAlexander Duyck } 4700a5c0475SEric Dumazet } 4710a5c0475SEric Dumazet 47269fa57b1SAlexander Duyck static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) 47369fa57b1SAlexander Duyck { 47469fa57b1SAlexander Duyck struct tnode *tp = node_parent(oldtnode); 47569fa57b1SAlexander Duyck unsigned long i; 47669fa57b1SAlexander Duyck 47769fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 47869fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 47969fa57b1SAlexander Duyck put_child_root(tp, t, tn->key, tn); 48069fa57b1SAlexander Duyck 48169fa57b1SAlexander Duyck /* update all of the child parent pointers */ 48269fa57b1SAlexander Duyck update_children(tn); 48369fa57b1SAlexander Duyck 48469fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 48569fa57b1SAlexander Duyck tnode_free(oldtnode); 48669fa57b1SAlexander Duyck 48769fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 48869fa57b1SAlexander Duyck for (i = tnode_child_length(tn); i;) { 48969fa57b1SAlexander Duyck struct tnode *inode = tnode_get_child(tn, --i); 49069fa57b1SAlexander Duyck 49169fa57b1SAlexander Duyck /* resize child node */ 49269fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 49369fa57b1SAlexander Duyck resize(t, inode); 49469fa57b1SAlexander Duyck } 49569fa57b1SAlexander Duyck } 49669fa57b1SAlexander Duyck 497ff181ed8SAlexander Duyck static int inflate(struct trie *t, struct tnode *oldtnode) 49819baf839SRobert Olsson { 49969fa57b1SAlexander Duyck struct tnode *tn; 50069fa57b1SAlexander Duyck unsigned long i; 501e9b44019SAlexander Duyck t_key m; 50219baf839SRobert Olsson 5030c7770c7SStephen Hemminger pr_debug("In inflate\n"); 50419baf839SRobert Olsson 505e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5062f80b3c8SRobert Olsson if (!tn) 507ff181ed8SAlexander Duyck return -ENOMEM; 5082f36895aSRobert Olsson 50969fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 51069fa57b1SAlexander Duyck tnode_free_init(oldtnode); 51169fa57b1SAlexander Duyck 51212c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 51312c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 51412c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 51512c081a5SAlexander Duyck * nodes. 5162f36895aSRobert Olsson */ 51712c081a5SAlexander Duyck for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { 51869fa57b1SAlexander Duyck struct tnode *inode = tnode_get_child(oldtnode, --i); 51969fa57b1SAlexander Duyck struct tnode *node0, *node1; 52069fa57b1SAlexander Duyck unsigned long j, k; 52119baf839SRobert Olsson 52219baf839SRobert Olsson /* An empty child */ 523adaf9816SAlexander Duyck if (inode == NULL) 52419baf839SRobert Olsson continue; 52519baf839SRobert Olsson 52619baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 527adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 528e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 52919baf839SRobert Olsson continue; 53019baf839SRobert Olsson } 53119baf839SRobert Olsson 53269fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 53369fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 53469fa57b1SAlexander Duyck 53519baf839SRobert Olsson /* An internal node with two children */ 53619baf839SRobert Olsson if (inode->bits == 1) { 53712c081a5SAlexander Duyck put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); 53812c081a5SAlexander Duyck put_child(tn, 2 * i, tnode_get_child(inode, 0)); 53991b9a277SOlof Johansson continue; 54019baf839SRobert Olsson } 54119baf839SRobert Olsson 54219baf839SRobert Olsson /* We will replace this node 'inode' with two new 54312c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 54419baf839SRobert Olsson * original children. The two new nodes will have 54519baf839SRobert Olsson * a position one bit further down the key and this 54619baf839SRobert Olsson * means that the "significant" part of their keys 54719baf839SRobert Olsson * (see the discussion near the top of this file) 54819baf839SRobert Olsson * will differ by one bit, which will be "0" in 54912c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 55019baf839SRobert Olsson * moving the key position by one step, the bit that 55119baf839SRobert Olsson * we are moving away from - the bit at position 55212c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 55312c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 55419baf839SRobert Olsson * two new keys. 55519baf839SRobert Olsson */ 55612c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 55712c081a5SAlexander Duyck if (!node1) 55812c081a5SAlexander Duyck goto nomem; 55969fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 56019baf839SRobert Olsson 56169fa57b1SAlexander Duyck tnode_free_append(tn, node1); 56212c081a5SAlexander Duyck if (!node0) 56312c081a5SAlexander Duyck goto nomem; 56412c081a5SAlexander Duyck tnode_free_append(tn, node0); 5652f36895aSRobert Olsson 56612c081a5SAlexander Duyck /* populate child pointers in new nodes */ 56712c081a5SAlexander Duyck for (k = tnode_child_length(inode), j = k / 2; j;) { 56812c081a5SAlexander Duyck put_child(node1, --j, tnode_get_child(inode, --k)); 56912c081a5SAlexander Duyck put_child(node0, j, tnode_get_child(inode, j)); 57012c081a5SAlexander Duyck put_child(node1, --j, tnode_get_child(inode, --k)); 57112c081a5SAlexander Duyck put_child(node0, j, tnode_get_child(inode, j)); 57219baf839SRobert Olsson } 573ff181ed8SAlexander Duyck 57412c081a5SAlexander Duyck /* link new nodes to parent */ 57512c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 57612c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 57712c081a5SAlexander Duyck 57812c081a5SAlexander Duyck /* link parent to nodes */ 57912c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 58012c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 58112c081a5SAlexander Duyck } 58212c081a5SAlexander Duyck 58369fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 58469fa57b1SAlexander Duyck replace(t, oldtnode, tn); 58512c081a5SAlexander Duyck 586ff181ed8SAlexander Duyck return 0; 587ff181ed8SAlexander Duyck nomem: 588fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 589fc86a93bSAlexander Duyck tnode_free(tn); 590ff181ed8SAlexander Duyck return -ENOMEM; 591ff181ed8SAlexander Duyck } 592ff181ed8SAlexander Duyck 593ff181ed8SAlexander Duyck static int halve(struct trie *t, struct tnode *oldtnode) 59419baf839SRobert Olsson { 59569fa57b1SAlexander Duyck struct tnode *tn; 59612c081a5SAlexander Duyck unsigned long i; 59719baf839SRobert Olsson 5980c7770c7SStephen Hemminger pr_debug("In halve\n"); 59919baf839SRobert Olsson 600e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6012f80b3c8SRobert Olsson if (!tn) 602ff181ed8SAlexander Duyck return -ENOMEM; 6032f36895aSRobert Olsson 60469fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 60569fa57b1SAlexander Duyck tnode_free_init(oldtnode); 60669fa57b1SAlexander Duyck 60712c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 60812c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 60912c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 61012c081a5SAlexander Duyck * nodes. 6112f36895aSRobert Olsson */ 61212c081a5SAlexander Duyck for (i = tnode_child_length(oldtnode); i;) { 61369fa57b1SAlexander Duyck struct tnode *node1 = tnode_get_child(oldtnode, --i); 61469fa57b1SAlexander Duyck struct tnode *node0 = tnode_get_child(oldtnode, --i); 61569fa57b1SAlexander Duyck struct tnode *inode; 6162f36895aSRobert Olsson 61712c081a5SAlexander Duyck /* At least one of the children is empty */ 61812c081a5SAlexander Duyck if (!node1 || !node0) { 61912c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 62012c081a5SAlexander Duyck continue; 62112c081a5SAlexander Duyck } 6222f36895aSRobert Olsson 6232f36895aSRobert Olsson /* Two nonempty children */ 62412c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 62512c081a5SAlexander Duyck if (!inode) { 626fc86a93bSAlexander Duyck tnode_free(tn); 627ff181ed8SAlexander Duyck return -ENOMEM; 628ff181ed8SAlexander Duyck } 62912c081a5SAlexander Duyck tnode_free_append(tn, inode); 6302f80b3c8SRobert Olsson 63112c081a5SAlexander Duyck /* initialize pointers out of node */ 63212c081a5SAlexander Duyck put_child(inode, 1, node1); 63312c081a5SAlexander Duyck put_child(inode, 0, node0); 63412c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 63512c081a5SAlexander Duyck 63612c081a5SAlexander Duyck /* link parent to node */ 63712c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6382f36895aSRobert Olsson } 6392f36895aSRobert Olsson 64069fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 64169fa57b1SAlexander Duyck replace(t, oldtnode, tn); 642ff181ed8SAlexander Duyck 643ff181ed8SAlexander Duyck return 0; 6442f80b3c8SRobert Olsson } 64519baf839SRobert Olsson 64695f60ea3SAlexander Duyck static void collapse(struct trie *t, struct tnode *oldtnode) 64795f60ea3SAlexander Duyck { 64895f60ea3SAlexander Duyck struct tnode *n, *tp; 64995f60ea3SAlexander Duyck unsigned long i; 65095f60ea3SAlexander Duyck 65195f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 65295f60ea3SAlexander Duyck for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) 65395f60ea3SAlexander Duyck n = tnode_get_child(oldtnode, --i); 65495f60ea3SAlexander Duyck 65595f60ea3SAlexander Duyck /* compress one level */ 65695f60ea3SAlexander Duyck tp = node_parent(oldtnode); 65795f60ea3SAlexander Duyck put_child_root(tp, t, oldtnode->key, n); 65895f60ea3SAlexander Duyck node_set_parent(n, tp); 65995f60ea3SAlexander Duyck 66095f60ea3SAlexander Duyck /* drop dead node */ 66195f60ea3SAlexander Duyck node_free(oldtnode); 66295f60ea3SAlexander Duyck } 66395f60ea3SAlexander Duyck 6645405afd1SAlexander Duyck static unsigned char update_suffix(struct tnode *tn) 6655405afd1SAlexander Duyck { 6665405afd1SAlexander Duyck unsigned char slen = tn->pos; 6675405afd1SAlexander Duyck unsigned long stride, i; 6685405afd1SAlexander Duyck 6695405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 6705405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 6715405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 6725405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 6735405afd1SAlexander Duyck */ 6745405afd1SAlexander Duyck for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { 6755405afd1SAlexander Duyck struct tnode *n = tnode_get_child(tn, i); 6765405afd1SAlexander Duyck 6775405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 6785405afd1SAlexander Duyck continue; 6795405afd1SAlexander Duyck 6805405afd1SAlexander Duyck /* update stride and slen based on new value */ 6815405afd1SAlexander Duyck stride <<= (n->slen - slen); 6825405afd1SAlexander Duyck slen = n->slen; 6835405afd1SAlexander Duyck i &= ~(stride - 1); 6845405afd1SAlexander Duyck 6855405afd1SAlexander Duyck /* if slen covers all but the last bit we can stop here 6865405afd1SAlexander Duyck * there will be nothing longer than that since only node 6875405afd1SAlexander Duyck * 0 and 1 << (bits - 1) could have that as their suffix 6885405afd1SAlexander Duyck * length. 6895405afd1SAlexander Duyck */ 6905405afd1SAlexander Duyck if ((slen + 1) >= (tn->pos + tn->bits)) 6915405afd1SAlexander Duyck break; 6925405afd1SAlexander Duyck } 6935405afd1SAlexander Duyck 6945405afd1SAlexander Duyck tn->slen = slen; 6955405afd1SAlexander Duyck 6965405afd1SAlexander Duyck return slen; 6975405afd1SAlexander Duyck } 6985405afd1SAlexander Duyck 699f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 700cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 701cf3637bbSAlexander Duyck * Telecommunications, page 6: 702cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 703cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 704cf3637bbSAlexander Duyck * 705cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 706cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 707cf3637bbSAlexander Duyck * tnode_child_length() and instead of multiplying by 2 (since the 708cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 709cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 710cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 711cf3637bbSAlexander Duyck * 712cf3637bbSAlexander Duyck * The left-hand side may look a bit weird: tnode_child_length(tn) 713cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 714cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 715cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 716cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 717cf3637bbSAlexander Duyck * we just count them one extra time here. 718cf3637bbSAlexander Duyck * 719cf3637bbSAlexander Duyck * A clearer way to write this would be: 720cf3637bbSAlexander Duyck * 721cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 722cf3637bbSAlexander Duyck * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 723cf3637bbSAlexander Duyck * tn->full_children; 724cf3637bbSAlexander Duyck * 725cf3637bbSAlexander Duyck * new_child_length = tnode_child_length(tn) * 2; 726cf3637bbSAlexander Duyck * 727cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 728cf3637bbSAlexander Duyck * new_child_length; 729cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 730cf3637bbSAlexander Duyck * 731cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 732cf3637bbSAlexander Duyck * 733cf3637bbSAlexander Duyck * anyway, 734cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 735cf3637bbSAlexander Duyck * inflate_threshold 736cf3637bbSAlexander Duyck * 737cf3637bbSAlexander Duyck * avoid a division: 738cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 739cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 740cf3637bbSAlexander Duyck * 741cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 742cf3637bbSAlexander Duyck * 100 * (tnode_child_length(tn) - tn->empty_children + 743cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 744cf3637bbSAlexander Duyck * 745cf3637bbSAlexander Duyck * expand new_child_length: 746cf3637bbSAlexander Duyck * 100 * (tnode_child_length(tn) - tn->empty_children + 747cf3637bbSAlexander Duyck * tn->full_children) >= 748cf3637bbSAlexander Duyck * inflate_threshold * tnode_child_length(tn) * 2 749cf3637bbSAlexander Duyck * 750cf3637bbSAlexander Duyck * shorten again: 751cf3637bbSAlexander Duyck * 50 * (tn->full_children + tnode_child_length(tn) - 752cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 753cf3637bbSAlexander Duyck * tnode_child_length(tn) 754cf3637bbSAlexander Duyck * 755cf3637bbSAlexander Duyck */ 756ff181ed8SAlexander Duyck static bool should_inflate(const struct tnode *tp, const struct tnode *tn) 757f05a4819SAlexander Duyck { 758f05a4819SAlexander Duyck unsigned long used = tnode_child_length(tn); 759f05a4819SAlexander Duyck unsigned long threshold = used; 760cf3637bbSAlexander Duyck 761cf3637bbSAlexander Duyck /* Keep root node larger */ 762ff181ed8SAlexander Duyck threshold *= tp ? inflate_threshold : inflate_threshold_root; 763f05a4819SAlexander Duyck used -= tn->empty_children; 76495f60ea3SAlexander Duyck used += tn->full_children; 765cf3637bbSAlexander Duyck 76695f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 76795f60ea3SAlexander Duyck 76895f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 769cf3637bbSAlexander Duyck } 770cf3637bbSAlexander Duyck 771ff181ed8SAlexander Duyck static bool should_halve(const struct tnode *tp, const struct tnode *tn) 772f05a4819SAlexander Duyck { 773f05a4819SAlexander Duyck unsigned long used = tnode_child_length(tn); 774f05a4819SAlexander Duyck unsigned long threshold = used; 775cf3637bbSAlexander Duyck 776f05a4819SAlexander Duyck /* Keep root node larger */ 777ff181ed8SAlexander Duyck threshold *= tp ? halve_threshold : halve_threshold_root; 778f05a4819SAlexander Duyck used -= tn->empty_children; 779f05a4819SAlexander Duyck 78095f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 78195f60ea3SAlexander Duyck 78295f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 78395f60ea3SAlexander Duyck } 78495f60ea3SAlexander Duyck 78595f60ea3SAlexander Duyck static bool should_collapse(const struct tnode *tn) 78695f60ea3SAlexander Duyck { 78795f60ea3SAlexander Duyck unsigned long used = tnode_child_length(tn); 78895f60ea3SAlexander Duyck 78995f60ea3SAlexander Duyck used -= tn->empty_children; 79095f60ea3SAlexander Duyck 79195f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 79295f60ea3SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn->full_children) 79395f60ea3SAlexander Duyck used -= KEY_MAX; 79495f60ea3SAlexander Duyck 79595f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 79695f60ea3SAlexander Duyck return used < 2; 797f05a4819SAlexander Duyck } 798f05a4819SAlexander Duyck 799f05a4819SAlexander Duyck #define MAX_WORK 10 800ff181ed8SAlexander Duyck static void resize(struct trie *t, struct tnode *tn) 801f05a4819SAlexander Duyck { 80295f60ea3SAlexander Duyck struct tnode *tp = node_parent(tn); 803ff181ed8SAlexander Duyck struct tnode __rcu **cptr; 804a80e89d4SAlexander Duyck int max_work = MAX_WORK; 805f05a4819SAlexander Duyck 806f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 807f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 808f05a4819SAlexander Duyck 809ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 810ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 811ff181ed8SAlexander Duyck * thing without us interfering 812ff181ed8SAlexander Duyck */ 813ff181ed8SAlexander Duyck cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; 814ff181ed8SAlexander Duyck BUG_ON(tn != rtnl_dereference(*cptr)); 815ff181ed8SAlexander Duyck 816f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 817f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 818f05a4819SAlexander Duyck */ 819a80e89d4SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 820ff181ed8SAlexander Duyck if (inflate(t, tn)) { 821cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 822cf3637bbSAlexander Duyck this_cpu_inc(t->stats->resize_node_skipped); 823cf3637bbSAlexander Duyck #endif 824cf3637bbSAlexander Duyck break; 825cf3637bbSAlexander Duyck } 826ff181ed8SAlexander Duyck 827a80e89d4SAlexander Duyck max_work--; 828ff181ed8SAlexander Duyck tn = rtnl_dereference(*cptr); 829cf3637bbSAlexander Duyck } 830cf3637bbSAlexander Duyck 831cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 832cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 833ff181ed8SAlexander Duyck return; 834cf3637bbSAlexander Duyck 835f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 836cf3637bbSAlexander Duyck * node is above threshold. 837cf3637bbSAlexander Duyck */ 838a80e89d4SAlexander Duyck while (should_halve(tp, tn) && max_work) { 839ff181ed8SAlexander Duyck if (halve(t, tn)) { 840cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 841cf3637bbSAlexander Duyck this_cpu_inc(t->stats->resize_node_skipped); 842cf3637bbSAlexander Duyck #endif 843cf3637bbSAlexander Duyck break; 844cf3637bbSAlexander Duyck } 845cf3637bbSAlexander Duyck 846a80e89d4SAlexander Duyck max_work--; 847ff181ed8SAlexander Duyck tn = rtnl_dereference(*cptr); 848ff181ed8SAlexander Duyck } 849cf3637bbSAlexander Duyck 850cf3637bbSAlexander Duyck /* Only one child remains */ 85195f60ea3SAlexander Duyck if (should_collapse(tn)) { 85295f60ea3SAlexander Duyck collapse(t, tn); 8535405afd1SAlexander Duyck return; 8545405afd1SAlexander Duyck } 8555405afd1SAlexander Duyck 8565405afd1SAlexander Duyck /* Return if at least one deflate was run */ 8575405afd1SAlexander Duyck if (max_work != MAX_WORK) 8585405afd1SAlexander Duyck return; 8595405afd1SAlexander Duyck 8605405afd1SAlexander Duyck /* push the suffix length to the parent node */ 8615405afd1SAlexander Duyck if (tn->slen > tn->pos) { 8625405afd1SAlexander Duyck unsigned char slen = update_suffix(tn); 8635405afd1SAlexander Duyck 8645405afd1SAlexander Duyck if (tp && (slen > tp->slen)) 8655405afd1SAlexander Duyck tp->slen = slen; 866cf3637bbSAlexander Duyck } 867cf3637bbSAlexander Duyck } 868cf3637bbSAlexander Duyck 869772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 8702373ce1cSRobert Olsson via get_fa_head and dump */ 8712373ce1cSRobert Olsson 872adaf9816SAlexander Duyck static struct leaf_info *find_leaf_info(struct tnode *l, int plen) 87319baf839SRobert Olsson { 874772cb712SRobert Olsson struct hlist_head *head = &l->list; 87519baf839SRobert Olsson struct leaf_info *li; 87619baf839SRobert Olsson 877b67bfe0dSSasha Levin hlist_for_each_entry_rcu(li, head, hlist) 87819baf839SRobert Olsson if (li->plen == plen) 87919baf839SRobert Olsson return li; 88091b9a277SOlof Johansson 88119baf839SRobert Olsson return NULL; 88219baf839SRobert Olsson } 88319baf839SRobert Olsson 884adaf9816SAlexander Duyck static inline struct list_head *get_fa_head(struct tnode *l, int plen) 88519baf839SRobert Olsson { 886772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 88719baf839SRobert Olsson 88891b9a277SOlof Johansson if (!li) 88991b9a277SOlof Johansson return NULL; 89019baf839SRobert Olsson 89191b9a277SOlof Johansson return &li->falh; 89219baf839SRobert Olsson } 89319baf839SRobert Olsson 8945405afd1SAlexander Duyck static void leaf_pull_suffix(struct tnode *l) 89519baf839SRobert Olsson { 8965405afd1SAlexander Duyck struct tnode *tp = node_parent(l); 8975405afd1SAlexander Duyck 8985405afd1SAlexander Duyck while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { 8995405afd1SAlexander Duyck if (update_suffix(tp) > l->slen) 9005405afd1SAlexander Duyck break; 9015405afd1SAlexander Duyck tp = node_parent(tp); 9025405afd1SAlexander Duyck } 9035405afd1SAlexander Duyck } 9045405afd1SAlexander Duyck 9055405afd1SAlexander Duyck static void leaf_push_suffix(struct tnode *l) 9065405afd1SAlexander Duyck { 9075405afd1SAlexander Duyck struct tnode *tn = node_parent(l); 9085405afd1SAlexander Duyck 9095405afd1SAlexander Duyck /* if this is a new leaf then tn will be NULL and we can sort 9105405afd1SAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 9115405afd1SAlexander Duyck */ 9125405afd1SAlexander Duyck while (tn && (tn->slen < l->slen)) { 9135405afd1SAlexander Duyck tn->slen = l->slen; 9145405afd1SAlexander Duyck tn = node_parent(tn); 9155405afd1SAlexander Duyck } 9165405afd1SAlexander Duyck } 9175405afd1SAlexander Duyck 9185405afd1SAlexander Duyck static void remove_leaf_info(struct tnode *l, struct leaf_info *old) 9195405afd1SAlexander Duyck { 92064c62723SAlexander Duyck /* record the location of the previous list_info entry */ 92164c62723SAlexander Duyck struct hlist_node **pprev = old->hlist.pprev; 92264c62723SAlexander Duyck struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); 9235405afd1SAlexander Duyck 9245405afd1SAlexander Duyck /* remove the leaf info from the list */ 9255405afd1SAlexander Duyck hlist_del_rcu(&old->hlist); 9265405afd1SAlexander Duyck 92764c62723SAlexander Duyck /* only access li if it is pointing at the last valid hlist_node */ 92864c62723SAlexander Duyck if (hlist_empty(&l->list) || (*pprev)) 9295405afd1SAlexander Duyck return; 9305405afd1SAlexander Duyck 93164c62723SAlexander Duyck /* update the trie with the latest suffix length */ 9325405afd1SAlexander Duyck l->slen = KEYLENGTH - li->plen; 9335405afd1SAlexander Duyck leaf_pull_suffix(l); 9345405afd1SAlexander Duyck } 9355405afd1SAlexander Duyck 9365405afd1SAlexander Duyck static void insert_leaf_info(struct tnode *l, struct leaf_info *new) 9375405afd1SAlexander Duyck { 9385405afd1SAlexander Duyck struct hlist_head *head = &l->list; 93919baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 94019baf839SRobert Olsson 94191b9a277SOlof Johansson if (hlist_empty(head)) { 9422373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 94391b9a277SOlof Johansson } else { 944b67bfe0dSSasha Levin hlist_for_each_entry(li, head, hlist) { 94519baf839SRobert Olsson if (new->plen > li->plen) 94619baf839SRobert Olsson break; 94719baf839SRobert Olsson 94819baf839SRobert Olsson last = li; 94919baf839SRobert Olsson } 95019baf839SRobert Olsson if (last) 9511d023284SKen Helias hlist_add_behind_rcu(&new->hlist, &last->hlist); 95219baf839SRobert Olsson else 9532373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 95419baf839SRobert Olsson } 9555405afd1SAlexander Duyck 9565405afd1SAlexander Duyck /* if we added to the tail node then we need to update slen */ 95764c62723SAlexander Duyck if (l->slen < (KEYLENGTH - new->plen)) { 9585405afd1SAlexander Duyck l->slen = KEYLENGTH - new->plen; 9595405afd1SAlexander Duyck leaf_push_suffix(l); 9605405afd1SAlexander Duyck } 96119baf839SRobert Olsson } 96219baf839SRobert Olsson 9632373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 964adaf9816SAlexander Duyck static struct tnode *fib_find_node(struct trie *t, u32 key) 96519baf839SRobert Olsson { 966adaf9816SAlexander Duyck struct tnode *n = rcu_dereference_rtnl(t->trie); 96719baf839SRobert Olsson 968939afb06SAlexander Duyck while (n) { 969939afb06SAlexander Duyck unsigned long index = get_index(key, n); 97019baf839SRobert Olsson 971939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 972939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 973939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 974939afb06SAlexander Duyck * is the difference between the key and this value. From 975939afb06SAlexander Duyck * this we can actually derive several pieces of data. 976b3832117SAlexander Duyck * if (index & (~0ul << bits)) 977939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 978b3832117SAlexander Duyck * else 979b3832117SAlexander Duyck * we know the value is cindex 980939afb06SAlexander Duyck */ 981b3832117SAlexander Duyck if (index & (~0ul << n->bits)) 98219baf839SRobert Olsson return NULL; 983939afb06SAlexander Duyck 984939afb06SAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 985939afb06SAlexander Duyck if (IS_LEAF(n)) 986939afb06SAlexander Duyck break; 987939afb06SAlexander Duyck 98821d1f11dSAlexander Duyck n = tnode_get_child_rcu(n, index); 989939afb06SAlexander Duyck } 990939afb06SAlexander Duyck 991939afb06SAlexander Duyck return n; 99219baf839SRobert Olsson } 99319baf839SRobert Olsson 99402525368SAlexander Duyck /* Return the first fib alias matching TOS with 99502525368SAlexander Duyck * priority less than or equal to PRIO. 99602525368SAlexander Duyck */ 99702525368SAlexander Duyck static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 99802525368SAlexander Duyck { 99902525368SAlexander Duyck struct fib_alias *fa; 100002525368SAlexander Duyck 100102525368SAlexander Duyck if (!fah) 100202525368SAlexander Duyck return NULL; 100302525368SAlexander Duyck 100402525368SAlexander Duyck list_for_each_entry(fa, fah, fa_list) { 100502525368SAlexander Duyck if (fa->fa_tos > tos) 100602525368SAlexander Duyck continue; 100702525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 100802525368SAlexander Duyck return fa; 100902525368SAlexander Duyck } 101002525368SAlexander Duyck 101102525368SAlexander Duyck return NULL; 101202525368SAlexander Duyck } 101302525368SAlexander Duyck 10147b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn) 101519baf839SRobert Olsson { 101606801916SStephen Hemminger struct tnode *tp; 101719baf839SRobert Olsson 1018ff181ed8SAlexander Duyck while ((tp = node_parent(tn)) != NULL) { 1019ff181ed8SAlexander Duyck resize(t, tn); 102006801916SStephen Hemminger tn = tp; 102119baf839SRobert Olsson } 102206801916SStephen Hemminger 102319baf839SRobert Olsson /* Handle last (top) tnode */ 10247b85576dSJarek Poplawski if (IS_TNODE(tn)) 1025ff181ed8SAlexander Duyck resize(t, tn); 102619baf839SRobert Olsson } 102719baf839SRobert Olsson 10282373ce1cSRobert Olsson /* only used from updater-side */ 10292373ce1cSRobert Olsson 1030fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 103119baf839SRobert Olsson { 103219baf839SRobert Olsson struct list_head *fa_head = NULL; 1033836a0123SAlexander Duyck struct tnode *l, *n, *tp = NULL; 103419baf839SRobert Olsson struct leaf_info *li; 103519baf839SRobert Olsson 1036836a0123SAlexander Duyck li = leaf_info_new(plen); 1037836a0123SAlexander Duyck if (!li) 1038836a0123SAlexander Duyck return NULL; 1039836a0123SAlexander Duyck fa_head = &li->falh; 1040836a0123SAlexander Duyck 10410a5c0475SEric Dumazet n = rtnl_dereference(t->trie); 104219baf839SRobert Olsson 104319baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 104419baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 104519baf839SRobert Olsson * and we should just put our new leaf in that. 104619baf839SRobert Olsson * 1047836a0123SAlexander Duyck * If we hit a node with a key that does't match then we should stop 1048836a0123SAlexander Duyck * and create a new tnode to replace that node and insert ourselves 1049836a0123SAlexander Duyck * and the other node into the new tnode. 105019baf839SRobert Olsson */ 1051836a0123SAlexander Duyck while (n) { 1052836a0123SAlexander Duyck unsigned long index = get_index(key, n); 105319baf839SRobert Olsson 1054836a0123SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 1055836a0123SAlexander Duyck * checks into a single check. The prefix consists of the 1056836a0123SAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 1057836a0123SAlexander Duyck * is the difference between the key and this value. From 1058836a0123SAlexander Duyck * this we can actually derive several pieces of data. 1059836a0123SAlexander Duyck * if !(index >> bits) 1060836a0123SAlexander Duyck * we know the value is child index 1061836a0123SAlexander Duyck * else 1062836a0123SAlexander Duyck * we have a mismatch in skip bits and failed 1063836a0123SAlexander Duyck */ 1064836a0123SAlexander Duyck if (index >> n->bits) 106519baf839SRobert Olsson break; 106619baf839SRobert Olsson 1067836a0123SAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 1068836a0123SAlexander Duyck if (IS_LEAF(n)) { 1069836a0123SAlexander Duyck /* Case 1: n is a leaf, and prefixes match*/ 10705405afd1SAlexander Duyck insert_leaf_info(n, li); 1071836a0123SAlexander Duyck return fa_head; 107219baf839SRobert Olsson } 1073836a0123SAlexander Duyck 1074836a0123SAlexander Duyck tp = n; 107521d1f11dSAlexander Duyck n = tnode_get_child_rcu(n, index); 1076836a0123SAlexander Duyck } 1077836a0123SAlexander Duyck 107864c9b6fbSAlexander Duyck l = leaf_new(key); 1079836a0123SAlexander Duyck if (!l) { 1080836a0123SAlexander Duyck free_leaf_info(li); 1081fea86ad8SStephen Hemminger return NULL; 1082f835e471SRobert Olsson } 108319baf839SRobert Olsson 10845405afd1SAlexander Duyck insert_leaf_info(l, li); 108519baf839SRobert Olsson 1086836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1087836a0123SAlexander Duyck * 108819baf839SRobert Olsson * Add a new tnode here 108919baf839SRobert Olsson * first tnode need some special handling 1090836a0123SAlexander Duyck * leaves us in position for handling as case 3 109119baf839SRobert Olsson */ 109219baf839SRobert Olsson if (n) { 1093836a0123SAlexander Duyck struct tnode *tn; 1094f835e471SRobert Olsson 1095e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 1096f835e471SRobert Olsson if (!tn) { 1097f835e471SRobert Olsson free_leaf_info(li); 109837fd30f2SAlexander Duyck node_free(l); 1099fea86ad8SStephen Hemminger return NULL; 1100f835e471SRobert Olsson } 110119baf839SRobert Olsson 1102836a0123SAlexander Duyck /* initialize routes out of node */ 1103836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1104836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 110519baf839SRobert Olsson 1106836a0123SAlexander Duyck /* start adding routes into the node */ 1107836a0123SAlexander Duyck put_child_root(tp, t, key, tn); 1108836a0123SAlexander Duyck node_set_parent(n, tn); 110919baf839SRobert Olsson 1110836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1111e962f302SAlexander Duyck tp = tn; 111219baf839SRobert Olsson } 111391b9a277SOlof Johansson 1114836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1115836a0123SAlexander Duyck if (tp) { 1116836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 1117836a0123SAlexander Duyck put_child(tp, get_index(key, tp), l); 11187b85576dSJarek Poplawski trie_rebalance(t, tp); 1119836a0123SAlexander Duyck } else { 1120836a0123SAlexander Duyck rcu_assign_pointer(t->trie, l); 1121836a0123SAlexander Duyck } 1122836a0123SAlexander Duyck 112319baf839SRobert Olsson return fa_head; 112419baf839SRobert Olsson } 112519baf839SRobert Olsson 1126d562f1f8SRobert Olsson /* 1127d562f1f8SRobert Olsson * Caller must hold RTNL. 1128d562f1f8SRobert Olsson */ 112916c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 113019baf839SRobert Olsson { 113119baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 113219baf839SRobert Olsson struct fib_alias *fa, *new_fa; 113319baf839SRobert Olsson struct list_head *fa_head = NULL; 113419baf839SRobert Olsson struct fib_info *fi; 11354e902c57SThomas Graf int plen = cfg->fc_dst_len; 11364e902c57SThomas Graf u8 tos = cfg->fc_tos; 113719baf839SRobert Olsson u32 key, mask; 113819baf839SRobert Olsson int err; 1139adaf9816SAlexander Duyck struct tnode *l; 114019baf839SRobert Olsson 114119baf839SRobert Olsson if (plen > 32) 114219baf839SRobert Olsson return -EINVAL; 114319baf839SRobert Olsson 11444e902c57SThomas Graf key = ntohl(cfg->fc_dst); 114519baf839SRobert Olsson 11462dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 114719baf839SRobert Olsson 114819baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 114919baf839SRobert Olsson 115019baf839SRobert Olsson if (key & ~mask) 115119baf839SRobert Olsson return -EINVAL; 115219baf839SRobert Olsson 115319baf839SRobert Olsson key = key & mask; 115419baf839SRobert Olsson 11554e902c57SThomas Graf fi = fib_create_info(cfg); 11564e902c57SThomas Graf if (IS_ERR(fi)) { 11574e902c57SThomas Graf err = PTR_ERR(fi); 115819baf839SRobert Olsson goto err; 11594e902c57SThomas Graf } 116019baf839SRobert Olsson 116119baf839SRobert Olsson l = fib_find_node(t, key); 116219baf839SRobert Olsson fa = NULL; 116319baf839SRobert Olsson 116419baf839SRobert Olsson if (l) { 116519baf839SRobert Olsson fa_head = get_fa_head(l, plen); 116619baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 116719baf839SRobert Olsson } 116819baf839SRobert Olsson 116919baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 117019baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 117119baf839SRobert Olsson * exists or to the node before which we will insert new one. 117219baf839SRobert Olsson * 117319baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 117419baf839SRobert Olsson * insert to the head of f. 117519baf839SRobert Olsson * 117619baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 117719baf839SRobert Olsson * and we need to allocate a new one of those as well. 117819baf839SRobert Olsson */ 117919baf839SRobert Olsson 1180936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1181936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1182936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 118319baf839SRobert Olsson 118419baf839SRobert Olsson err = -EEXIST; 11854e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 118619baf839SRobert Olsson goto out; 118719baf839SRobert Olsson 1188936f6f8eSJulian Anastasov /* We have 2 goals: 1189936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1190936f6f8eSJulian Anastasov * duplicate routes 1191936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1192936f6f8eSJulian Anastasov */ 1193936f6f8eSJulian Anastasov fa_match = NULL; 1194936f6f8eSJulian Anastasov fa_first = fa; 1195936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1196936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 1197936f6f8eSJulian Anastasov if (fa->fa_tos != tos) 1198936f6f8eSJulian Anastasov break; 1199936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1200936f6f8eSJulian Anastasov break; 1201936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1202936f6f8eSJulian Anastasov fa->fa_info == fi) { 1203936f6f8eSJulian Anastasov fa_match = fa; 1204936f6f8eSJulian Anastasov break; 1205936f6f8eSJulian Anastasov } 1206936f6f8eSJulian Anastasov } 1207936f6f8eSJulian Anastasov 12084e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 120919baf839SRobert Olsson struct fib_info *fi_drop; 121019baf839SRobert Olsson u8 state; 121119baf839SRobert Olsson 1212936f6f8eSJulian Anastasov fa = fa_first; 1213936f6f8eSJulian Anastasov if (fa_match) { 1214936f6f8eSJulian Anastasov if (fa == fa_match) 1215936f6f8eSJulian Anastasov err = 0; 12166725033fSJoonwoo Park goto out; 1217936f6f8eSJulian Anastasov } 12182373ce1cSRobert Olsson err = -ENOBUFS; 1219e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 12202373ce1cSRobert Olsson if (new_fa == NULL) 12212373ce1cSRobert Olsson goto out; 122219baf839SRobert Olsson 122319baf839SRobert Olsson fi_drop = fa->fa_info; 12242373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12252373ce1cSRobert Olsson new_fa->fa_info = fi; 12264e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 122719baf839SRobert Olsson state = fa->fa_state; 1228936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 122919baf839SRobert Olsson 12302373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12312373ce1cSRobert Olsson alias_free_mem_rcu(fa); 123219baf839SRobert Olsson 123319baf839SRobert Olsson fib_release_info(fi_drop); 123419baf839SRobert Olsson if (state & FA_S_ACCESSED) 12354ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1236b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1237b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 123819baf839SRobert Olsson 123919baf839SRobert Olsson goto succeeded; 124019baf839SRobert Olsson } 124119baf839SRobert Olsson /* Error if we find a perfect match which 124219baf839SRobert Olsson * uses the same scope, type, and nexthop 124319baf839SRobert Olsson * information. 124419baf839SRobert Olsson */ 1245936f6f8eSJulian Anastasov if (fa_match) 124619baf839SRobert Olsson goto out; 1247a07f5f50SStephen Hemminger 12484e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1249936f6f8eSJulian Anastasov fa = fa_first; 125019baf839SRobert Olsson } 125119baf839SRobert Olsson err = -ENOENT; 12524e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 125319baf839SRobert Olsson goto out; 125419baf839SRobert Olsson 125519baf839SRobert Olsson err = -ENOBUFS; 1256e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 125719baf839SRobert Olsson if (new_fa == NULL) 125819baf839SRobert Olsson goto out; 125919baf839SRobert Olsson 126019baf839SRobert Olsson new_fa->fa_info = fi; 126119baf839SRobert Olsson new_fa->fa_tos = tos; 12624e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 126319baf839SRobert Olsson new_fa->fa_state = 0; 126419baf839SRobert Olsson /* 126519baf839SRobert Olsson * Insert new entry to the list. 126619baf839SRobert Olsson */ 126719baf839SRobert Olsson 1268f835e471SRobert Olsson if (!fa_head) { 1269fea86ad8SStephen Hemminger fa_head = fib_insert_node(t, key, plen); 1270fea86ad8SStephen Hemminger if (unlikely(!fa_head)) { 1271fea86ad8SStephen Hemminger err = -ENOMEM; 1272f835e471SRobert Olsson goto out_free_new_fa; 1273f835e471SRobert Olsson } 1274fea86ad8SStephen Hemminger } 127519baf839SRobert Olsson 127621d8c49eSDavid S. Miller if (!plen) 127721d8c49eSDavid S. Miller tb->tb_num_default++; 127821d8c49eSDavid S. Miller 12792373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 12802373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 128119baf839SRobert Olsson 12824ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 12834e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1284b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 128519baf839SRobert Olsson succeeded: 128619baf839SRobert Olsson return 0; 1287f835e471SRobert Olsson 1288f835e471SRobert Olsson out_free_new_fa: 1289f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 129019baf839SRobert Olsson out: 129119baf839SRobert Olsson fib_release_info(fi); 129291b9a277SOlof Johansson err: 129319baf839SRobert Olsson return err; 129419baf839SRobert Olsson } 129519baf839SRobert Olsson 12969f9e636dSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct tnode *n) 12979f9e636dSAlexander Duyck { 12989f9e636dSAlexander Duyck t_key prefix = n->key; 12999f9e636dSAlexander Duyck 13009f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 13019f9e636dSAlexander Duyck } 13029f9e636dSAlexander Duyck 1303345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 130422bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1305ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 130619baf839SRobert Olsson { 130719baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 13088274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 13098274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 13108274a97aSAlexander Duyck #endif 13119f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 13129f9e636dSAlexander Duyck struct tnode *n, *pn; 1313345e9b54SAlexander Duyck struct leaf_info *li; 13149f9e636dSAlexander Duyck t_key cindex; 131519baf839SRobert Olsson 13162373ce1cSRobert Olsson n = rcu_dereference(t->trie); 131719baf839SRobert Olsson if (!n) 1318345e9b54SAlexander Duyck return -EAGAIN; 131919baf839SRobert Olsson 132019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13218274a97aSAlexander Duyck this_cpu_inc(stats->gets); 132219baf839SRobert Olsson #endif 132319baf839SRobert Olsson 13249f9e636dSAlexander Duyck pn = n; 13259f9e636dSAlexander Duyck cindex = 0; 13269f9e636dSAlexander Duyck 13279f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 13289f9e636dSAlexander Duyck for (;;) { 13299f9e636dSAlexander Duyck unsigned long index = get_index(key, n); 13309f9e636dSAlexander Duyck 13319f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13329f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13339f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13349f9e636dSAlexander Duyck * is the difference between the key and this value. From 13359f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 1336b3832117SAlexander Duyck * if (index & (~0ul << bits)) 13379f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1338b3832117SAlexander Duyck * else 1339b3832117SAlexander Duyck * we know the value is cindex 13409f9e636dSAlexander Duyck */ 1341b3832117SAlexander Duyck if (index & (~0ul << n->bits)) 13429f9e636dSAlexander Duyck break; 13439f9e636dSAlexander Duyck 13449f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13459f9e636dSAlexander Duyck if (IS_LEAF(n)) 1346a07f5f50SStephen Hemminger goto found; 13479f9e636dSAlexander Duyck 13489f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13499f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13509f9e636dSAlexander Duyck */ 13515405afd1SAlexander Duyck if (n->slen > n->pos) { 13529f9e636dSAlexander Duyck pn = n; 13539f9e636dSAlexander Duyck cindex = index; 135419baf839SRobert Olsson } 1355a07f5f50SStephen Hemminger 135621d1f11dSAlexander Duyck n = tnode_get_child_rcu(n, index); 13579f9e636dSAlexander Duyck if (unlikely(!n)) 13589f9e636dSAlexander Duyck goto backtrace; 13599f9e636dSAlexander Duyck } 136019baf839SRobert Olsson 13619f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13629f9e636dSAlexander Duyck for (;;) { 13639f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 13649f9e636dSAlexander Duyck struct tnode __rcu **cptr = n->child; 136519baf839SRobert Olsson 13669f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13679f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13689f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13699f9e636dSAlexander Duyck */ 13705405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13719f9e636dSAlexander Duyck goto backtrace; 137219baf839SRobert Olsson 13739f9e636dSAlexander Duyck /* exit out and process leaf */ 13749f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13759f9e636dSAlexander Duyck break; 137619baf839SRobert Olsson 13779f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13789f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 13799f9e636dSAlexander Duyck * we started this traversal anyway 13809f9e636dSAlexander Duyck */ 13819f9e636dSAlexander Duyck 13829f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 13839f9e636dSAlexander Duyck backtrace: 138419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13859f9e636dSAlexander Duyck if (!n) 13868274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 138719baf839SRobert Olsson #endif 13889f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 13899f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 13909f9e636dSAlexander Duyck * up one level to see if there are any more bits to 13919f9e636dSAlexander Duyck * be stripped there. 139219baf839SRobert Olsson */ 13939f9e636dSAlexander Duyck while (!cindex) { 13949f9e636dSAlexander Duyck t_key pkey = pn->key; 139519baf839SRobert Olsson 13969f9e636dSAlexander Duyck pn = node_parent_rcu(pn); 13979f9e636dSAlexander Duyck if (unlikely(!pn)) 1398345e9b54SAlexander Duyck return -EAGAIN; 139919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14008274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 140119baf839SRobert Olsson #endif 14029f9e636dSAlexander Duyck /* Get Child's index */ 14039f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 14049f9e636dSAlexander Duyck } 14059f9e636dSAlexander Duyck 14069f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 14079f9e636dSAlexander Duyck cindex &= cindex - 1; 14089f9e636dSAlexander Duyck 14099f9e636dSAlexander Duyck /* grab pointer for next child node */ 14109f9e636dSAlexander Duyck cptr = &pn->child[cindex]; 141119baf839SRobert Olsson } 141219baf839SRobert Olsson } 14139f9e636dSAlexander Duyck 141419baf839SRobert Olsson found: 14159f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 1416345e9b54SAlexander Duyck hlist_for_each_entry_rcu(li, &n->list, hlist) { 1417345e9b54SAlexander Duyck struct fib_alias *fa; 1418345e9b54SAlexander Duyck 1419345e9b54SAlexander Duyck if ((key ^ n->key) & li->mask_plen) 1420345e9b54SAlexander Duyck continue; 1421345e9b54SAlexander Duyck 1422345e9b54SAlexander Duyck list_for_each_entry_rcu(fa, &li->falh, fa_list) { 1423345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1424345e9b54SAlexander Duyck int nhsel, err; 1425345e9b54SAlexander Duyck 1426345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1427345e9b54SAlexander Duyck continue; 1428345e9b54SAlexander Duyck if (fi->fib_dead) 1429345e9b54SAlexander Duyck continue; 1430345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1431345e9b54SAlexander Duyck continue; 1432345e9b54SAlexander Duyck fib_alias_accessed(fa); 1433345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1434345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1435345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1436345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1437345e9b54SAlexander Duyck #endif 1438345e9b54SAlexander Duyck return err; 1439345e9b54SAlexander Duyck } 1440345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1441345e9b54SAlexander Duyck continue; 1442345e9b54SAlexander Duyck for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1443345e9b54SAlexander Duyck const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1444345e9b54SAlexander Duyck 1445345e9b54SAlexander Duyck if (nh->nh_flags & RTNH_F_DEAD) 1446345e9b54SAlexander Duyck continue; 1447345e9b54SAlexander Duyck if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) 1448345e9b54SAlexander Duyck continue; 1449345e9b54SAlexander Duyck 1450345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 1451345e9b54SAlexander Duyck atomic_inc(&fi->fib_clntref); 1452345e9b54SAlexander Duyck 1453345e9b54SAlexander Duyck res->prefixlen = li->plen; 1454345e9b54SAlexander Duyck res->nh_sel = nhsel; 1455345e9b54SAlexander Duyck res->type = fa->fa_type; 1456345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1457345e9b54SAlexander Duyck res->fi = fi; 1458345e9b54SAlexander Duyck res->table = tb; 1459345e9b54SAlexander Duyck res->fa_head = &li->falh; 1460345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1461345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1462345e9b54SAlexander Duyck #endif 1463345e9b54SAlexander Duyck return err; 1464345e9b54SAlexander Duyck } 1465345e9b54SAlexander Duyck } 1466345e9b54SAlexander Duyck 1467345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1468345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1469345e9b54SAlexander Duyck #endif 1470345e9b54SAlexander Duyck } 14719f9e636dSAlexander Duyck goto backtrace; 147219baf839SRobert Olsson } 14736fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 147419baf839SRobert Olsson 147519baf839SRobert Olsson /* 14769195bef7SStephen Hemminger * Remove the leaf and return parent. 147719baf839SRobert Olsson */ 1478adaf9816SAlexander Duyck static void trie_leaf_remove(struct trie *t, struct tnode *l) 14799195bef7SStephen Hemminger { 148064c9b6fbSAlexander Duyck struct tnode *tp = node_parent(l); 14819195bef7SStephen Hemminger 14829195bef7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", l); 148319baf839SRobert Olsson 148419baf839SRobert Olsson if (tp) { 1485836a0123SAlexander Duyck put_child(tp, get_index(l->key, tp), NULL); 14867b85576dSJarek Poplawski trie_rebalance(t, tp); 1487836a0123SAlexander Duyck } else { 1488a9b3cd7fSStephen Hemminger RCU_INIT_POINTER(t->trie, NULL); 1489836a0123SAlexander Duyck } 149019baf839SRobert Olsson 149137fd30f2SAlexander Duyck node_free(l); 149219baf839SRobert Olsson } 149319baf839SRobert Olsson 1494d562f1f8SRobert Olsson /* 1495d562f1f8SRobert Olsson * Caller must hold RTNL. 1496d562f1f8SRobert Olsson */ 149716c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) 149819baf839SRobert Olsson { 149919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 150019baf839SRobert Olsson u32 key, mask; 15014e902c57SThomas Graf int plen = cfg->fc_dst_len; 15024e902c57SThomas Graf u8 tos = cfg->fc_tos; 150319baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 150419baf839SRobert Olsson struct list_head *fa_head; 1505adaf9816SAlexander Duyck struct tnode *l; 150691b9a277SOlof Johansson struct leaf_info *li; 150791b9a277SOlof Johansson 150819baf839SRobert Olsson if (plen > 32) 150919baf839SRobert Olsson return -EINVAL; 151019baf839SRobert Olsson 15114e902c57SThomas Graf key = ntohl(cfg->fc_dst); 151219baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 151319baf839SRobert Olsson 151419baf839SRobert Olsson if (key & ~mask) 151519baf839SRobert Olsson return -EINVAL; 151619baf839SRobert Olsson 151719baf839SRobert Olsson key = key & mask; 151819baf839SRobert Olsson l = fib_find_node(t, key); 151919baf839SRobert Olsson 152019baf839SRobert Olsson if (!l) 152119baf839SRobert Olsson return -ESRCH; 152219baf839SRobert Olsson 1523ad5b3102SIgor Maravic li = find_leaf_info(l, plen); 1524ad5b3102SIgor Maravic 1525ad5b3102SIgor Maravic if (!li) 1526ad5b3102SIgor Maravic return -ESRCH; 1527ad5b3102SIgor Maravic 1528ad5b3102SIgor Maravic fa_head = &li->falh; 152919baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 153019baf839SRobert Olsson 153119baf839SRobert Olsson if (!fa) 153219baf839SRobert Olsson return -ESRCH; 153319baf839SRobert Olsson 15340c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 153519baf839SRobert Olsson 153619baf839SRobert Olsson fa_to_delete = NULL; 1537936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1538936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 153919baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 154019baf839SRobert Olsson 154119baf839SRobert Olsson if (fa->fa_tos != tos) 154219baf839SRobert Olsson break; 154319baf839SRobert Olsson 15444e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15454e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 154637e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 154774cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 154874cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15494e902c57SThomas Graf (!cfg->fc_protocol || 15504e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15514e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 155219baf839SRobert Olsson fa_to_delete = fa; 155319baf839SRobert Olsson break; 155419baf839SRobert Olsson } 155519baf839SRobert Olsson } 155619baf839SRobert Olsson 155791b9a277SOlof Johansson if (!fa_to_delete) 155891b9a277SOlof Johansson return -ESRCH; 155919baf839SRobert Olsson 156019baf839SRobert Olsson fa = fa_to_delete; 15614e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1562b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 156319baf839SRobert Olsson 15642373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 156519baf839SRobert Olsson 156621d8c49eSDavid S. Miller if (!plen) 156721d8c49eSDavid S. Miller tb->tb_num_default--; 156821d8c49eSDavid S. Miller 156919baf839SRobert Olsson if (list_empty(fa_head)) { 15705405afd1SAlexander Duyck remove_leaf_info(l, li); 157119baf839SRobert Olsson free_leaf_info(li); 15722373ce1cSRobert Olsson } 157319baf839SRobert Olsson 157419baf839SRobert Olsson if (hlist_empty(&l->list)) 15759195bef7SStephen Hemminger trie_leaf_remove(t, l); 157619baf839SRobert Olsson 157719baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 15784ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 157919baf839SRobert Olsson 15802373ce1cSRobert Olsson fib_release_info(fa->fa_info); 15812373ce1cSRobert Olsson alias_free_mem_rcu(fa); 158219baf839SRobert Olsson return 0; 158319baf839SRobert Olsson } 158419baf839SRobert Olsson 1585ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head) 158619baf839SRobert Olsson { 158719baf839SRobert Olsson struct fib_alias *fa, *fa_node; 158819baf839SRobert Olsson int found = 0; 158919baf839SRobert Olsson 159019baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 159119baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 159219baf839SRobert Olsson 159319baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 15942373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 15952373ce1cSRobert Olsson fib_release_info(fa->fa_info); 15962373ce1cSRobert Olsson alias_free_mem_rcu(fa); 159719baf839SRobert Olsson found++; 159819baf839SRobert Olsson } 159919baf839SRobert Olsson } 160019baf839SRobert Olsson return found; 160119baf839SRobert Olsson } 160219baf839SRobert Olsson 1603adaf9816SAlexander Duyck static int trie_flush_leaf(struct tnode *l) 160419baf839SRobert Olsson { 160519baf839SRobert Olsson int found = 0; 160619baf839SRobert Olsson struct hlist_head *lih = &l->list; 1607b67bfe0dSSasha Levin struct hlist_node *tmp; 160819baf839SRobert Olsson struct leaf_info *li = NULL; 160964c62723SAlexander Duyck unsigned char plen = KEYLENGTH; 161019baf839SRobert Olsson 1611b67bfe0dSSasha Levin hlist_for_each_entry_safe(li, tmp, lih, hlist) { 1612ef3660ceSStephen Hemminger found += trie_flush_list(&li->falh); 161319baf839SRobert Olsson 161419baf839SRobert Olsson if (list_empty(&li->falh)) { 16152373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 161619baf839SRobert Olsson free_leaf_info(li); 161764c62723SAlexander Duyck continue; 161819baf839SRobert Olsson } 161964c62723SAlexander Duyck 162064c62723SAlexander Duyck plen = li->plen; 162119baf839SRobert Olsson } 162264c62723SAlexander Duyck 162364c62723SAlexander Duyck l->slen = KEYLENGTH - plen; 162464c62723SAlexander Duyck 162519baf839SRobert Olsson return found; 162619baf839SRobert Olsson } 162719baf839SRobert Olsson 162882cfbb00SStephen Hemminger /* 162982cfbb00SStephen Hemminger * Scan for the next right leaf starting at node p->child[idx] 163082cfbb00SStephen Hemminger * Since we have back pointer, no recursion necessary. 163182cfbb00SStephen Hemminger */ 1632adaf9816SAlexander Duyck static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) 163319baf839SRobert Olsson { 163482cfbb00SStephen Hemminger do { 163598293e8dSAlexander Duyck unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; 163619baf839SRobert Olsson 163798293e8dSAlexander Duyck while (idx < tnode_child_length(p)) { 163882cfbb00SStephen Hemminger c = tnode_get_child_rcu(p, idx++); 16392373ce1cSRobert Olsson if (!c) 164091b9a277SOlof Johansson continue; 164119baf839SRobert Olsson 1642aab515d7SEric Dumazet if (IS_LEAF(c)) 1643adaf9816SAlexander Duyck return c; 164482cfbb00SStephen Hemminger 164582cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 1646adaf9816SAlexander Duyck p = c; 164782cfbb00SStephen Hemminger idx = 0; 164819baf839SRobert Olsson } 164982cfbb00SStephen Hemminger 165082cfbb00SStephen Hemminger /* Node empty, walk back up to parent */ 1651adaf9816SAlexander Duyck c = p; 165282cfbb00SStephen Hemminger } while ((p = node_parent_rcu(c)) != NULL); 165382cfbb00SStephen Hemminger 165482cfbb00SStephen Hemminger return NULL; /* Root of trie */ 165582cfbb00SStephen Hemminger } 165682cfbb00SStephen Hemminger 1657adaf9816SAlexander Duyck static struct tnode *trie_firstleaf(struct trie *t) 165882cfbb00SStephen Hemminger { 1659adaf9816SAlexander Duyck struct tnode *n = rcu_dereference_rtnl(t->trie); 166082cfbb00SStephen Hemminger 166182cfbb00SStephen Hemminger if (!n) 166282cfbb00SStephen Hemminger return NULL; 166382cfbb00SStephen Hemminger 166482cfbb00SStephen Hemminger if (IS_LEAF(n)) /* trie is just a leaf */ 1665adaf9816SAlexander Duyck return n; 166682cfbb00SStephen Hemminger 166782cfbb00SStephen Hemminger return leaf_walk_rcu(n, NULL); 166882cfbb00SStephen Hemminger } 166982cfbb00SStephen Hemminger 1670adaf9816SAlexander Duyck static struct tnode *trie_nextleaf(struct tnode *l) 167182cfbb00SStephen Hemminger { 1672adaf9816SAlexander Duyck struct tnode *p = node_parent_rcu(l); 167382cfbb00SStephen Hemminger 167482cfbb00SStephen Hemminger if (!p) 167582cfbb00SStephen Hemminger return NULL; /* trie with just one leaf */ 167682cfbb00SStephen Hemminger 1677adaf9816SAlexander Duyck return leaf_walk_rcu(p, l); 167819baf839SRobert Olsson } 167919baf839SRobert Olsson 1680adaf9816SAlexander Duyck static struct tnode *trie_leafindex(struct trie *t, int index) 168171d67e66SStephen Hemminger { 1682adaf9816SAlexander Duyck struct tnode *l = trie_firstleaf(t); 168371d67e66SStephen Hemminger 1684ec28cf73SStephen Hemminger while (l && index-- > 0) 168571d67e66SStephen Hemminger l = trie_nextleaf(l); 1686ec28cf73SStephen Hemminger 168771d67e66SStephen Hemminger return l; 168871d67e66SStephen Hemminger } 168971d67e66SStephen Hemminger 169071d67e66SStephen Hemminger 1691d562f1f8SRobert Olsson /* 1692d562f1f8SRobert Olsson * Caller must hold RTNL. 1693d562f1f8SRobert Olsson */ 169416c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb) 169519baf839SRobert Olsson { 169619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 1697adaf9816SAlexander Duyck struct tnode *l, *ll = NULL; 169882cfbb00SStephen Hemminger int found = 0; 169919baf839SRobert Olsson 170082cfbb00SStephen Hemminger for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { 1701ef3660ceSStephen Hemminger found += trie_flush_leaf(l); 170219baf839SRobert Olsson 170364c62723SAlexander Duyck if (ll) { 170464c62723SAlexander Duyck if (hlist_empty(&ll->list)) 17059195bef7SStephen Hemminger trie_leaf_remove(t, ll); 170664c62723SAlexander Duyck else 170764c62723SAlexander Duyck leaf_pull_suffix(ll); 170864c62723SAlexander Duyck } 170964c62723SAlexander Duyck 171019baf839SRobert Olsson ll = l; 171119baf839SRobert Olsson } 171219baf839SRobert Olsson 171364c62723SAlexander Duyck if (ll) { 171464c62723SAlexander Duyck if (hlist_empty(&ll->list)) 17159195bef7SStephen Hemminger trie_leaf_remove(t, ll); 171664c62723SAlexander Duyck else 171764c62723SAlexander Duyck leaf_pull_suffix(ll); 171864c62723SAlexander Duyck } 171919baf839SRobert Olsson 17200c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 172119baf839SRobert Olsson return found; 172219baf839SRobert Olsson } 172319baf839SRobert Olsson 17244aa2c466SPavel Emelyanov void fib_free_table(struct fib_table *tb) 17254aa2c466SPavel Emelyanov { 17268274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 17278274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 17288274a97aSAlexander Duyck 17298274a97aSAlexander Duyck free_percpu(t->stats); 17308274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 17314aa2c466SPavel Emelyanov kfree(tb); 17324aa2c466SPavel Emelyanov } 17334aa2c466SPavel Emelyanov 1734a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1735a07f5f50SStephen Hemminger struct fib_table *tb, 173619baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 173719baf839SRobert Olsson { 173819baf839SRobert Olsson int i, s_i; 173919baf839SRobert Olsson struct fib_alias *fa; 174032ab5f80SAl Viro __be32 xkey = htonl(key); 174119baf839SRobert Olsson 174271d67e66SStephen Hemminger s_i = cb->args[5]; 174319baf839SRobert Olsson i = 0; 174419baf839SRobert Olsson 17452373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 17462373ce1cSRobert Olsson 17472373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 174819baf839SRobert Olsson if (i < s_i) { 174919baf839SRobert Olsson i++; 175019baf839SRobert Olsson continue; 175119baf839SRobert Olsson } 175219baf839SRobert Olsson 175315e47304SEric W. Biederman if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 175419baf839SRobert Olsson cb->nlh->nlmsg_seq, 175519baf839SRobert Olsson RTM_NEWROUTE, 175619baf839SRobert Olsson tb->tb_id, 175719baf839SRobert Olsson fa->fa_type, 1758be403ea1SThomas Graf xkey, 175919baf839SRobert Olsson plen, 176019baf839SRobert Olsson fa->fa_tos, 176164347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 176271d67e66SStephen Hemminger cb->args[5] = i; 176319baf839SRobert Olsson return -1; 176419baf839SRobert Olsson } 176519baf839SRobert Olsson i++; 176619baf839SRobert Olsson } 176771d67e66SStephen Hemminger cb->args[5] = i; 176819baf839SRobert Olsson return skb->len; 176919baf839SRobert Olsson } 177019baf839SRobert Olsson 1771adaf9816SAlexander Duyck static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, 1772a07f5f50SStephen Hemminger struct sk_buff *skb, struct netlink_callback *cb) 177319baf839SRobert Olsson { 1774a88ee229SStephen Hemminger struct leaf_info *li; 1775a88ee229SStephen Hemminger int i, s_i; 177691b9a277SOlof Johansson 177771d67e66SStephen Hemminger s_i = cb->args[4]; 1778a88ee229SStephen Hemminger i = 0; 177919baf839SRobert Olsson 1780a88ee229SStephen Hemminger /* rcu_read_lock is hold by caller */ 1781b67bfe0dSSasha Levin hlist_for_each_entry_rcu(li, &l->list, hlist) { 1782a88ee229SStephen Hemminger if (i < s_i) { 1783a88ee229SStephen Hemminger i++; 178419baf839SRobert Olsson continue; 1785a88ee229SStephen Hemminger } 178619baf839SRobert Olsson 1787a88ee229SStephen Hemminger if (i > s_i) 178871d67e66SStephen Hemminger cb->args[5] = 0; 178919baf839SRobert Olsson 1790a88ee229SStephen Hemminger if (list_empty(&li->falh)) 179119baf839SRobert Olsson continue; 179219baf839SRobert Olsson 1793a88ee229SStephen Hemminger if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { 179471d67e66SStephen Hemminger cb->args[4] = i; 179519baf839SRobert Olsson return -1; 179619baf839SRobert Olsson } 1797a88ee229SStephen Hemminger i++; 179819baf839SRobert Olsson } 1799a88ee229SStephen Hemminger 180071d67e66SStephen Hemminger cb->args[4] = i; 180119baf839SRobert Olsson return skb->len; 180219baf839SRobert Olsson } 180319baf839SRobert Olsson 180416c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1805a07f5f50SStephen Hemminger struct netlink_callback *cb) 180619baf839SRobert Olsson { 1807adaf9816SAlexander Duyck struct tnode *l; 180819baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 1809d5ce8a0eSStephen Hemminger t_key key = cb->args[2]; 181071d67e66SStephen Hemminger int count = cb->args[3]; 181119baf839SRobert Olsson 18122373ce1cSRobert Olsson rcu_read_lock(); 1813d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1814d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1815d5ce8a0eSStephen Hemminger */ 181671d67e66SStephen Hemminger if (count == 0) 1817d5ce8a0eSStephen Hemminger l = trie_firstleaf(t); 1818d5ce8a0eSStephen Hemminger else { 181971d67e66SStephen Hemminger /* Normally, continue from last key, but if that is missing 182071d67e66SStephen Hemminger * fallback to using slow rescan 1821d5ce8a0eSStephen Hemminger */ 182271d67e66SStephen Hemminger l = fib_find_node(t, key); 182371d67e66SStephen Hemminger if (!l) 182471d67e66SStephen Hemminger l = trie_leafindex(t, count); 182519baf839SRobert Olsson } 1826a88ee229SStephen Hemminger 1827d5ce8a0eSStephen Hemminger while (l) { 1828d5ce8a0eSStephen Hemminger cb->args[2] = l->key; 1829a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 183071d67e66SStephen Hemminger cb->args[3] = count; 18312373ce1cSRobert Olsson rcu_read_unlock(); 183219baf839SRobert Olsson return -1; 183319baf839SRobert Olsson } 1834d5ce8a0eSStephen Hemminger 183571d67e66SStephen Hemminger ++count; 1836d5ce8a0eSStephen Hemminger l = trie_nextleaf(l); 183771d67e66SStephen Hemminger memset(&cb->args[4], 0, 183871d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 1839a88ee229SStephen Hemminger } 184071d67e66SStephen Hemminger cb->args[3] = count; 1841a88ee229SStephen Hemminger rcu_read_unlock(); 1842a88ee229SStephen Hemminger 1843a88ee229SStephen Hemminger return skb->len; 1844a88ee229SStephen Hemminger } 184519baf839SRobert Olsson 18465348ba85SDavid S. Miller void __init fib_trie_init(void) 18477f9b8052SStephen Hemminger { 1848a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1849a07f5f50SStephen Hemminger sizeof(struct fib_alias), 1850bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 1851bc3c8c1eSStephen Hemminger 1852bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 1853adaf9816SAlexander Duyck max(sizeof(struct tnode), 1854bc3c8c1eSStephen Hemminger sizeof(struct leaf_info)), 1855bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 18567f9b8052SStephen Hemminger } 185719baf839SRobert Olsson 18587f9b8052SStephen Hemminger 18595348ba85SDavid S. Miller struct fib_table *fib_trie_table(u32 id) 186019baf839SRobert Olsson { 186119baf839SRobert Olsson struct fib_table *tb; 186219baf839SRobert Olsson struct trie *t; 186319baf839SRobert Olsson 186419baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 186519baf839SRobert Olsson GFP_KERNEL); 186619baf839SRobert Olsson if (tb == NULL) 186719baf839SRobert Olsson return NULL; 186819baf839SRobert Olsson 186919baf839SRobert Olsson tb->tb_id = id; 1870971b893eSDenis V. Lunev tb->tb_default = -1; 187121d8c49eSDavid S. Miller tb->tb_num_default = 0; 187219baf839SRobert Olsson 187319baf839SRobert Olsson t = (struct trie *) tb->tb_data; 18748274a97aSAlexander Duyck RCU_INIT_POINTER(t->trie, NULL); 18758274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 18768274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 18778274a97aSAlexander Duyck if (!t->stats) { 18788274a97aSAlexander Duyck kfree(tb); 18798274a97aSAlexander Duyck tb = NULL; 18808274a97aSAlexander Duyck } 18818274a97aSAlexander Duyck #endif 188219baf839SRobert Olsson 188319baf839SRobert Olsson return tb; 188419baf839SRobert Olsson } 188519baf839SRobert Olsson 1886cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 1887cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 1888cb7b593cSStephen Hemminger struct fib_trie_iter { 18891c340b2fSDenis V. Lunev struct seq_net_private p; 18903d3b2d25SStephen Hemminger struct fib_table *tb; 1891cb7b593cSStephen Hemminger struct tnode *tnode; 1892a034ee3cSEric Dumazet unsigned int index; 1893a034ee3cSEric Dumazet unsigned int depth; 1894cb7b593cSStephen Hemminger }; 189519baf839SRobert Olsson 1896adaf9816SAlexander Duyck static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) 189719baf839SRobert Olsson { 189898293e8dSAlexander Duyck unsigned long cindex = iter->index; 1899cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 1900cb7b593cSStephen Hemminger struct tnode *p; 190119baf839SRobert Olsson 19026640e697SEric W. Biederman /* A single entry routing table */ 19036640e697SEric W. Biederman if (!tn) 19046640e697SEric W. Biederman return NULL; 19056640e697SEric W. Biederman 1906cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 1907cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 1908cb7b593cSStephen Hemminger rescan: 190998293e8dSAlexander Duyck while (cindex < tnode_child_length(tn)) { 1910adaf9816SAlexander Duyck struct tnode *n = tnode_get_child_rcu(tn, cindex); 191119baf839SRobert Olsson 1912cb7b593cSStephen Hemminger if (n) { 191319baf839SRobert Olsson if (IS_LEAF(n)) { 1914cb7b593cSStephen Hemminger iter->tnode = tn; 1915cb7b593cSStephen Hemminger iter->index = cindex + 1; 191691b9a277SOlof Johansson } else { 1917cb7b593cSStephen Hemminger /* push down one level */ 1918adaf9816SAlexander Duyck iter->tnode = n; 1919cb7b593cSStephen Hemminger iter->index = 0; 1920cb7b593cSStephen Hemminger ++iter->depth; 192119baf839SRobert Olsson } 1922cb7b593cSStephen Hemminger return n; 192319baf839SRobert Olsson } 192419baf839SRobert Olsson 1925cb7b593cSStephen Hemminger ++cindex; 1926cb7b593cSStephen Hemminger } 1927cb7b593cSStephen Hemminger 1928cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 1929adaf9816SAlexander Duyck p = node_parent_rcu(tn); 1930cb7b593cSStephen Hemminger if (p) { 1931e9b44019SAlexander Duyck cindex = get_index(tn->key, p) + 1; 1932cb7b593cSStephen Hemminger tn = p; 1933cb7b593cSStephen Hemminger --iter->depth; 1934cb7b593cSStephen Hemminger goto rescan; 1935cb7b593cSStephen Hemminger } 1936cb7b593cSStephen Hemminger 1937cb7b593cSStephen Hemminger /* got root? */ 1938cb7b593cSStephen Hemminger return NULL; 1939cb7b593cSStephen Hemminger } 1940cb7b593cSStephen Hemminger 1941adaf9816SAlexander Duyck static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, 1942cb7b593cSStephen Hemminger struct trie *t) 1943cb7b593cSStephen Hemminger { 1944adaf9816SAlexander Duyck struct tnode *n; 19455ddf0eb2SRobert Olsson 19465ddf0eb2SRobert Olsson if (!t) 19475ddf0eb2SRobert Olsson return NULL; 19485ddf0eb2SRobert Olsson 19495ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 19503d3b2d25SStephen Hemminger if (!n) 19515ddf0eb2SRobert Olsson return NULL; 1952cb7b593cSStephen Hemminger 19536640e697SEric W. Biederman if (IS_TNODE(n)) { 1954adaf9816SAlexander Duyck iter->tnode = n; 1955cb7b593cSStephen Hemminger iter->index = 0; 19561d25cd6cSRobert Olsson iter->depth = 1; 19576640e697SEric W. Biederman } else { 19586640e697SEric W. Biederman iter->tnode = NULL; 19596640e697SEric W. Biederman iter->index = 0; 19606640e697SEric W. Biederman iter->depth = 0; 19616640e697SEric W. Biederman } 19623d3b2d25SStephen Hemminger 1963cb7b593cSStephen Hemminger return n; 1964cb7b593cSStephen Hemminger } 1965cb7b593cSStephen Hemminger 1966cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 196719baf839SRobert Olsson { 1968adaf9816SAlexander Duyck struct tnode *n; 1969cb7b593cSStephen Hemminger struct fib_trie_iter iter; 1970cb7b593cSStephen Hemminger 1971cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 197219baf839SRobert Olsson 19732373ce1cSRobert Olsson rcu_read_lock(); 19743d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 1975cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 197693672292SStephen Hemminger struct leaf_info *li; 197793672292SStephen Hemminger 197819baf839SRobert Olsson s->leaves++; 1979cb7b593cSStephen Hemminger s->totdepth += iter.depth; 1980cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 1981cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 198293672292SStephen Hemminger 1983adaf9816SAlexander Duyck hlist_for_each_entry_rcu(li, &n->list, hlist) 198493672292SStephen Hemminger ++s->prefixes; 198591b9a277SOlof Johansson } else { 198619baf839SRobert Olsson s->tnodes++; 1987adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 1988adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 198930cfe7c9SAlexander Duyck s->nullpointers += n->empty_children; 199019baf839SRobert Olsson } 199198293e8dSAlexander Duyck } 19922373ce1cSRobert Olsson rcu_read_unlock(); 199319baf839SRobert Olsson } 199419baf839SRobert Olsson 199519baf839SRobert Olsson /* 199619baf839SRobert Olsson * This outputs /proc/net/fib_triestats 199719baf839SRobert Olsson */ 1998cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 199919baf839SRobert Olsson { 2000a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 200119baf839SRobert Olsson 200219baf839SRobert Olsson if (stat->leaves) 200319baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 200419baf839SRobert Olsson else 200519baf839SRobert Olsson avdepth = 0; 200619baf839SRobert Olsson 2007a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2008a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2009cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2010cb7b593cSStephen Hemminger 2011cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2012adaf9816SAlexander Duyck bytes = sizeof(struct tnode) * stat->leaves; 201393672292SStephen Hemminger 201493672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 201593672292SStephen Hemminger bytes += sizeof(struct leaf_info) * stat->prefixes; 201693672292SStephen Hemminger 2017187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 201819baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 201919baf839SRobert Olsson 202006ef921dSRobert Olsson max = MAX_STAT_DEPTH; 202106ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 202219baf839SRobert Olsson max--; 202319baf839SRobert Olsson 2024cb7b593cSStephen Hemminger pointers = 0; 2025f585a991SJerry Snitselaar for (i = 1; i < max; i++) 202619baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2027187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 202819baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 202919baf839SRobert Olsson } 2030cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2031187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2032cb7b593cSStephen Hemminger 2033adaf9816SAlexander Duyck bytes += sizeof(struct tnode *) * pointers; 2034187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2035187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 203666a2f7fdSStephen Hemminger } 203719baf839SRobert Olsson 203819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 203966a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 20408274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 204166a2f7fdSStephen Hemminger { 20428274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 20438274a97aSAlexander Duyck int cpu; 20448274a97aSAlexander Duyck 20458274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 20468274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 20478274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 20488274a97aSAlexander Duyck 20498274a97aSAlexander Duyck s.gets += pcpu->gets; 20508274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 20518274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 20528274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 20538274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 20548274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 20558274a97aSAlexander Duyck } 20568274a97aSAlexander Duyck 205766a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 20588274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 20598274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2060a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 20618274a97aSAlexander Duyck s.semantic_match_passed); 20628274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 20638274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 20648274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 206519baf839SRobert Olsson } 206666a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 206766a2f7fdSStephen Hemminger 20683d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2069d717a9a6SStephen Hemminger { 20703d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 20713d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 20723d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 20733d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 20743d3b2d25SStephen Hemminger else 20753d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2076d717a9a6SStephen Hemminger } 207719baf839SRobert Olsson 20783d3b2d25SStephen Hemminger 207919baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 208019baf839SRobert Olsson { 20811c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 20823d3b2d25SStephen Hemminger unsigned int h; 2083877a9bffSEric W. Biederman 2084d717a9a6SStephen Hemminger seq_printf(seq, 2085a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2086a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 2087adaf9816SAlexander Duyck sizeof(struct tnode), sizeof(struct tnode)); 208819baf839SRobert Olsson 20893d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 20903d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 20913d3b2d25SStephen Hemminger struct fib_table *tb; 2092cb7b593cSStephen Hemminger 2093b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 20943d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 20953d3b2d25SStephen Hemminger struct trie_stat stat; 20963d3b2d25SStephen Hemminger 20973d3b2d25SStephen Hemminger if (!t) 20983d3b2d25SStephen Hemminger continue; 20993d3b2d25SStephen Hemminger 21003d3b2d25SStephen Hemminger fib_table_print(seq, tb); 21013d3b2d25SStephen Hemminger 21023d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 21033d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 21043d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 21058274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 21063d3b2d25SStephen Hemminger #endif 21073d3b2d25SStephen Hemminger } 21083d3b2d25SStephen Hemminger } 2109cb7b593cSStephen Hemminger 211019baf839SRobert Olsson return 0; 211119baf839SRobert Olsson } 211219baf839SRobert Olsson 211319baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 211419baf839SRobert Olsson { 2115de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 21161c340b2fSDenis V. Lunev } 21171c340b2fSDenis V. Lunev 21189a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 211919baf839SRobert Olsson .owner = THIS_MODULE, 212019baf839SRobert Olsson .open = fib_triestat_seq_open, 212119baf839SRobert Olsson .read = seq_read, 212219baf839SRobert Olsson .llseek = seq_lseek, 2123b6fcbdb4SPavel Emelyanov .release = single_release_net, 212419baf839SRobert Olsson }; 212519baf839SRobert Olsson 2126adaf9816SAlexander Duyck static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 212719baf839SRobert Olsson { 21281218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 21291218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2130cb7b593cSStephen Hemminger loff_t idx = 0; 21313d3b2d25SStephen Hemminger unsigned int h; 21323d3b2d25SStephen Hemminger 21333d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 21343d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 21353d3b2d25SStephen Hemminger struct fib_table *tb; 21363d3b2d25SStephen Hemminger 2137b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 2138adaf9816SAlexander Duyck struct tnode *n; 2139cb7b593cSStephen Hemminger 21403d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 21413d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 21423d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 21433d3b2d25SStephen Hemminger if (pos == idx++) { 21443d3b2d25SStephen Hemminger iter->tb = tb; 2145cb7b593cSStephen Hemminger return n; 214619baf839SRobert Olsson } 21473d3b2d25SStephen Hemminger } 21483d3b2d25SStephen Hemminger } 214919baf839SRobert Olsson 215019baf839SRobert Olsson return NULL; 215119baf839SRobert Olsson } 215219baf839SRobert Olsson 215319baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2154c95aaf9aSStephen Hemminger __acquires(RCU) 215519baf839SRobert Olsson { 2156cb7b593cSStephen Hemminger rcu_read_lock(); 21571218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 215819baf839SRobert Olsson } 215919baf839SRobert Olsson 216019baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 216119baf839SRobert Olsson { 2162cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 21631218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 21643d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 21653d3b2d25SStephen Hemminger struct hlist_node *tb_node; 21663d3b2d25SStephen Hemminger unsigned int h; 2167adaf9816SAlexander Duyck struct tnode *n; 2168cb7b593cSStephen Hemminger 216919baf839SRobert Olsson ++*pos; 21703d3b2d25SStephen Hemminger /* next node in same table */ 21713d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 21723d3b2d25SStephen Hemminger if (n) 21733d3b2d25SStephen Hemminger return n; 217491b9a277SOlof Johansson 21753d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 21763d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 21770a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 21783d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 21793d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 21803d3b2d25SStephen Hemminger if (n) 21813d3b2d25SStephen Hemminger goto found; 21823d3b2d25SStephen Hemminger } 2183cb7b593cSStephen Hemminger 21843d3b2d25SStephen Hemminger /* new hash chain */ 21853d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 21863d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2187b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 21883d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 21893d3b2d25SStephen Hemminger if (n) 21903d3b2d25SStephen Hemminger goto found; 21913d3b2d25SStephen Hemminger } 21923d3b2d25SStephen Hemminger } 2193cb7b593cSStephen Hemminger return NULL; 21943d3b2d25SStephen Hemminger 21953d3b2d25SStephen Hemminger found: 21963d3b2d25SStephen Hemminger iter->tb = tb; 21973d3b2d25SStephen Hemminger return n; 219819baf839SRobert Olsson } 219919baf839SRobert Olsson 220019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2201c95aaf9aSStephen Hemminger __releases(RCU) 220219baf839SRobert Olsson { 2203cb7b593cSStephen Hemminger rcu_read_unlock(); 220419baf839SRobert Olsson } 220519baf839SRobert Olsson 2206cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2207cb7b593cSStephen Hemminger { 2208a034ee3cSEric Dumazet while (n-- > 0) 2209a034ee3cSEric Dumazet seq_puts(seq, " "); 2210cb7b593cSStephen Hemminger } 221119baf839SRobert Olsson 221228d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2213cb7b593cSStephen Hemminger { 2214cb7b593cSStephen Hemminger switch (s) { 2215cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2216cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2217cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2218cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2219cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2220cb7b593cSStephen Hemminger default: 222128d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2222cb7b593cSStephen Hemminger return buf; 2223cb7b593cSStephen Hemminger } 2224cb7b593cSStephen Hemminger } 2225cb7b593cSStephen Hemminger 222636cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2227cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2228cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2229cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2230cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2231cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2232cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2233cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2234cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2235cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2236cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2237cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2238cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2239cb7b593cSStephen Hemminger }; 2240cb7b593cSStephen Hemminger 2241a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2242cb7b593cSStephen Hemminger { 2243cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2244cb7b593cSStephen Hemminger return rtn_type_names[t]; 224528d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2246cb7b593cSStephen Hemminger return buf; 2247cb7b593cSStephen Hemminger } 2248cb7b593cSStephen Hemminger 2249cb7b593cSStephen Hemminger /* Pretty print the trie */ 225019baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 225119baf839SRobert Olsson { 2252cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2253adaf9816SAlexander Duyck struct tnode *n = v; 225419baf839SRobert Olsson 22553d3b2d25SStephen Hemminger if (!node_parent_rcu(n)) 22563d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2257095b8501SRobert Olsson 2258095b8501SRobert Olsson if (IS_TNODE(n)) { 2259adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2260095b8501SRobert Olsson 22611d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2262e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2263e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 2264e9b44019SAlexander Duyck n->full_children, n->empty_children); 2265cb7b593cSStephen Hemminger } else { 22661328042eSStephen Hemminger struct leaf_info *li; 2267adaf9816SAlexander Duyck __be32 val = htonl(n->key); 2268cb7b593cSStephen Hemminger 2269cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2270673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 227128d36e37SEric Dumazet 2272adaf9816SAlexander Duyck hlist_for_each_entry_rcu(li, &n->list, hlist) { 2273cb7b593cSStephen Hemminger struct fib_alias *fa; 227428d36e37SEric Dumazet 2275cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 227628d36e37SEric Dumazet char buf1[32], buf2[32]; 227728d36e37SEric Dumazet 2278cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 22791328042eSStephen Hemminger seq_printf(seq, " /%d %s %s", li->plen, 228028d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 228137e826c5SDavid S. Miller fa->fa_info->fib_scope), 228228d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 228328d36e37SEric Dumazet fa->fa_type)); 2284cb7b593cSStephen Hemminger if (fa->fa_tos) 2285b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2286cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2287cb7b593cSStephen Hemminger } 2288cb7b593cSStephen Hemminger } 2289cb7b593cSStephen Hemminger } 229019baf839SRobert Olsson 229119baf839SRobert Olsson return 0; 229219baf839SRobert Olsson } 229319baf839SRobert Olsson 2294f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 229519baf839SRobert Olsson .start = fib_trie_seq_start, 229619baf839SRobert Olsson .next = fib_trie_seq_next, 229719baf839SRobert Olsson .stop = fib_trie_seq_stop, 229819baf839SRobert Olsson .show = fib_trie_seq_show, 229919baf839SRobert Olsson }; 230019baf839SRobert Olsson 230119baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 230219baf839SRobert Olsson { 23031c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2304cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 230519baf839SRobert Olsson } 230619baf839SRobert Olsson 23079a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 230819baf839SRobert Olsson .owner = THIS_MODULE, 230919baf839SRobert Olsson .open = fib_trie_seq_open, 231019baf839SRobert Olsson .read = seq_read, 231119baf839SRobert Olsson .llseek = seq_lseek, 23121c340b2fSDenis V. Lunev .release = seq_release_net, 231319baf839SRobert Olsson }; 231419baf839SRobert Olsson 23158315f5d8SStephen Hemminger struct fib_route_iter { 23168315f5d8SStephen Hemminger struct seq_net_private p; 23178315f5d8SStephen Hemminger struct trie *main_trie; 23188315f5d8SStephen Hemminger loff_t pos; 23198315f5d8SStephen Hemminger t_key key; 23208315f5d8SStephen Hemminger }; 23218315f5d8SStephen Hemminger 2322adaf9816SAlexander Duyck static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) 23238315f5d8SStephen Hemminger { 2324adaf9816SAlexander Duyck struct tnode *l = NULL; 23258315f5d8SStephen Hemminger struct trie *t = iter->main_trie; 23268315f5d8SStephen Hemminger 23278315f5d8SStephen Hemminger /* use cache location of last found key */ 23288315f5d8SStephen Hemminger if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) 23298315f5d8SStephen Hemminger pos -= iter->pos; 23308315f5d8SStephen Hemminger else { 23318315f5d8SStephen Hemminger iter->pos = 0; 23328315f5d8SStephen Hemminger l = trie_firstleaf(t); 23338315f5d8SStephen Hemminger } 23348315f5d8SStephen Hemminger 23358315f5d8SStephen Hemminger while (l && pos-- > 0) { 23368315f5d8SStephen Hemminger iter->pos++; 23378315f5d8SStephen Hemminger l = trie_nextleaf(l); 23388315f5d8SStephen Hemminger } 23398315f5d8SStephen Hemminger 23408315f5d8SStephen Hemminger if (l) 23418315f5d8SStephen Hemminger iter->key = pos; /* remember it */ 23428315f5d8SStephen Hemminger else 23438315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 23448315f5d8SStephen Hemminger 23458315f5d8SStephen Hemminger return l; 23468315f5d8SStephen Hemminger } 23478315f5d8SStephen Hemminger 23488315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 23498315f5d8SStephen Hemminger __acquires(RCU) 23508315f5d8SStephen Hemminger { 23518315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 23528315f5d8SStephen Hemminger struct fib_table *tb; 23538315f5d8SStephen Hemminger 23548315f5d8SStephen Hemminger rcu_read_lock(); 23551218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 23568315f5d8SStephen Hemminger if (!tb) 23578315f5d8SStephen Hemminger return NULL; 23588315f5d8SStephen Hemminger 23598315f5d8SStephen Hemminger iter->main_trie = (struct trie *) tb->tb_data; 23608315f5d8SStephen Hemminger if (*pos == 0) 23618315f5d8SStephen Hemminger return SEQ_START_TOKEN; 23628315f5d8SStephen Hemminger else 23638315f5d8SStephen Hemminger return fib_route_get_idx(iter, *pos - 1); 23648315f5d8SStephen Hemminger } 23658315f5d8SStephen Hemminger 23668315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 23678315f5d8SStephen Hemminger { 23688315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 2369adaf9816SAlexander Duyck struct tnode *l = v; 23708315f5d8SStephen Hemminger 23718315f5d8SStephen Hemminger ++*pos; 23728315f5d8SStephen Hemminger if (v == SEQ_START_TOKEN) { 23738315f5d8SStephen Hemminger iter->pos = 0; 23748315f5d8SStephen Hemminger l = trie_firstleaf(iter->main_trie); 23758315f5d8SStephen Hemminger } else { 23768315f5d8SStephen Hemminger iter->pos++; 23778315f5d8SStephen Hemminger l = trie_nextleaf(l); 23788315f5d8SStephen Hemminger } 23798315f5d8SStephen Hemminger 23808315f5d8SStephen Hemminger if (l) 23818315f5d8SStephen Hemminger iter->key = l->key; 23828315f5d8SStephen Hemminger else 23838315f5d8SStephen Hemminger iter->pos = 0; 23848315f5d8SStephen Hemminger return l; 23858315f5d8SStephen Hemminger } 23868315f5d8SStephen Hemminger 23878315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 23888315f5d8SStephen Hemminger __releases(RCU) 23898315f5d8SStephen Hemminger { 23908315f5d8SStephen Hemminger rcu_read_unlock(); 23918315f5d8SStephen Hemminger } 23928315f5d8SStephen Hemminger 2393a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2394cb7b593cSStephen Hemminger { 2395a034ee3cSEric Dumazet unsigned int flags = 0; 2396cb7b593cSStephen Hemminger 2397a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2398a034ee3cSEric Dumazet flags = RTF_REJECT; 2399cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2400cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 240132ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2402cb7b593cSStephen Hemminger flags |= RTF_HOST; 2403cb7b593cSStephen Hemminger flags |= RTF_UP; 2404cb7b593cSStephen Hemminger return flags; 2405cb7b593cSStephen Hemminger } 2406cb7b593cSStephen Hemminger 2407cb7b593cSStephen Hemminger /* 2408cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2409cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2410cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2411cb7b593cSStephen Hemminger * legacy utilities 2412cb7b593cSStephen Hemminger */ 2413cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2414cb7b593cSStephen Hemminger { 2415adaf9816SAlexander Duyck struct tnode *l = v; 24161328042eSStephen Hemminger struct leaf_info *li; 2417cb7b593cSStephen Hemminger 2418cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2419cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2420cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2421cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2422cb7b593cSStephen Hemminger return 0; 2423cb7b593cSStephen Hemminger } 2424cb7b593cSStephen Hemminger 2425b67bfe0dSSasha Levin hlist_for_each_entry_rcu(li, &l->list, hlist) { 2426cb7b593cSStephen Hemminger struct fib_alias *fa; 242732ab5f80SAl Viro __be32 mask, prefix; 2428cb7b593cSStephen Hemminger 2429cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2430cb7b593cSStephen Hemminger prefix = htonl(l->key); 2431cb7b593cSStephen Hemminger 2432cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 24331371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2434a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2435cb7b593cSStephen Hemminger 2436cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2437cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2438cb7b593cSStephen Hemminger continue; 2439cb7b593cSStephen Hemminger 2440652586dfSTetsuo Handa seq_setwidth(seq, 127); 2441652586dfSTetsuo Handa 2442cb7b593cSStephen Hemminger if (fi) 24435e659e4cSPavel Emelyanov seq_printf(seq, 24445e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2445652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2446cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2447cb7b593cSStephen Hemminger prefix, 2448cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2449cb7b593cSStephen Hemminger fi->fib_priority, 2450cb7b593cSStephen Hemminger mask, 2451a07f5f50SStephen Hemminger (fi->fib_advmss ? 2452a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2453cb7b593cSStephen Hemminger fi->fib_window, 2454652586dfSTetsuo Handa fi->fib_rtt >> 3); 2455cb7b593cSStephen Hemminger else 24565e659e4cSPavel Emelyanov seq_printf(seq, 24575e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2458652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2459cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2460652586dfSTetsuo Handa mask, 0, 0, 0); 2461cb7b593cSStephen Hemminger 2462652586dfSTetsuo Handa seq_pad(seq, '\n'); 2463cb7b593cSStephen Hemminger } 2464cb7b593cSStephen Hemminger } 2465cb7b593cSStephen Hemminger 2466cb7b593cSStephen Hemminger return 0; 2467cb7b593cSStephen Hemminger } 2468cb7b593cSStephen Hemminger 2469f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 24708315f5d8SStephen Hemminger .start = fib_route_seq_start, 24718315f5d8SStephen Hemminger .next = fib_route_seq_next, 24728315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2473cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2474cb7b593cSStephen Hemminger }; 2475cb7b593cSStephen Hemminger 2476cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2477cb7b593cSStephen Hemminger { 24781c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 24798315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2480cb7b593cSStephen Hemminger } 2481cb7b593cSStephen Hemminger 24829a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2483cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2484cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2485cb7b593cSStephen Hemminger .read = seq_read, 2486cb7b593cSStephen Hemminger .llseek = seq_lseek, 24871c340b2fSDenis V. Lunev .release = seq_release_net, 2488cb7b593cSStephen Hemminger }; 2489cb7b593cSStephen Hemminger 249061a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 249119baf839SRobert Olsson { 2492d4beaa66SGao feng if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) 2493cb7b593cSStephen Hemminger goto out1; 2494cb7b593cSStephen Hemminger 2495d4beaa66SGao feng if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, 249661a02653SDenis V. Lunev &fib_triestat_fops)) 2497cb7b593cSStephen Hemminger goto out2; 2498cb7b593cSStephen Hemminger 2499d4beaa66SGao feng if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) 2500cb7b593cSStephen Hemminger goto out3; 2501cb7b593cSStephen Hemminger 250219baf839SRobert Olsson return 0; 2503cb7b593cSStephen Hemminger 2504cb7b593cSStephen Hemminger out3: 2505ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2506cb7b593cSStephen Hemminger out2: 2507ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2508cb7b593cSStephen Hemminger out1: 2509cb7b593cSStephen Hemminger return -ENOMEM; 251019baf839SRobert Olsson } 251119baf839SRobert Olsson 251261a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 251319baf839SRobert Olsson { 2514ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2515ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2516ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 251719baf839SRobert Olsson } 251819baf839SRobert Olsson 251919baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2520