119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1519baf839SRobert Olsson * This work is based on the LPC-trie which is originally descibed in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 1919baf839SRobert Olsson * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ 2619baf839SRobert Olsson * 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2919baf839SRobert Olsson * 3019baf839SRobert Olsson * 3119baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3219baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3319baf839SRobert Olsson * interface as the means of communication with the user level. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3619baf839SRobert Olsson * 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3919baf839SRobert Olsson * 4019baf839SRobert Olsson * This program is free software; you can redistribute it and/or 4119baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4219baf839SRobert Olsson * as published by the Free Software Foundation; either version 4319baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * Substantial contributions to this work comes from: 46fd966255SRobert Olsson * 47fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 48fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 49fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 50fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 5119baf839SRobert Olsson */ 5219baf839SRobert Olsson 5305eee48cSRobert Olsson #define VERSION "0.408" 5419baf839SRobert Olsson 5519baf839SRobert Olsson #include <asm/uaccess.h> 5619baf839SRobert Olsson #include <asm/system.h> 571977f032SJiri Slaby #include <linux/bitops.h> 5819baf839SRobert Olsson #include <linux/types.h> 5919baf839SRobert Olsson #include <linux/kernel.h> 6019baf839SRobert Olsson #include <linux/mm.h> 6119baf839SRobert Olsson #include <linux/string.h> 6219baf839SRobert Olsson #include <linux/socket.h> 6319baf839SRobert Olsson #include <linux/sockios.h> 6419baf839SRobert Olsson #include <linux/errno.h> 6519baf839SRobert Olsson #include <linux/in.h> 6619baf839SRobert Olsson #include <linux/inet.h> 67cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6819baf839SRobert Olsson #include <linux/netdevice.h> 6919baf839SRobert Olsson #include <linux/if_arp.h> 7019baf839SRobert Olsson #include <linux/proc_fs.h> 712373ce1cSRobert Olsson #include <linux/rcupdate.h> 7219baf839SRobert Olsson #include <linux/skbuff.h> 7319baf839SRobert Olsson #include <linux/netlink.h> 7419baf839SRobert Olsson #include <linux/init.h> 7519baf839SRobert Olsson #include <linux/list.h> 76457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7719baf839SRobert Olsson #include <net/ip.h> 7819baf839SRobert Olsson #include <net/protocol.h> 7919baf839SRobert Olsson #include <net/route.h> 8019baf839SRobert Olsson #include <net/tcp.h> 8119baf839SRobert Olsson #include <net/sock.h> 8219baf839SRobert Olsson #include <net/ip_fib.h> 8319baf839SRobert Olsson #include "fib_lookup.h" 8419baf839SRobert Olsson 8506ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8619baf839SRobert Olsson 8719baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8819baf839SRobert Olsson 8919baf839SRobert Olsson typedef unsigned int t_key; 9019baf839SRobert Olsson 9119baf839SRobert Olsson #define T_TNODE 0 9219baf839SRobert Olsson #define T_LEAF 1 9319baf839SRobert Olsson #define NODE_TYPE_MASK 0x1UL 942373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) 952373ce1cSRobert Olsson 9691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF)) 9791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF) 9819baf839SRobert Olsson 9919baf839SRobert Olsson struct node { 10091b9a277SOlof Johansson unsigned long parent; 1018d965444SEric Dumazet t_key key; 10219baf839SRobert Olsson }; 10319baf839SRobert Olsson 10419baf839SRobert Olsson struct leaf { 10591b9a277SOlof Johansson unsigned long parent; 1068d965444SEric Dumazet t_key key; 10719baf839SRobert Olsson struct hlist_head list; 1082373ce1cSRobert Olsson struct rcu_head rcu; 10919baf839SRobert Olsson }; 11019baf839SRobert Olsson 11119baf839SRobert Olsson struct leaf_info { 11219baf839SRobert Olsson struct hlist_node hlist; 1132373ce1cSRobert Olsson struct rcu_head rcu; 11419baf839SRobert Olsson int plen; 11519baf839SRobert Olsson struct list_head falh; 11619baf839SRobert Olsson }; 11719baf839SRobert Olsson 11819baf839SRobert Olsson struct tnode { 11991b9a277SOlof Johansson unsigned long parent; 1208d965444SEric Dumazet t_key key; 121112d8cfcSEric Dumazet unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 122112d8cfcSEric Dumazet unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 1238d965444SEric Dumazet unsigned int full_children; /* KEYLENGTH bits needed */ 1248d965444SEric Dumazet unsigned int empty_children; /* KEYLENGTH bits needed */ 1252373ce1cSRobert Olsson struct rcu_head rcu; 12619baf839SRobert Olsson struct node *child[0]; 12719baf839SRobert Olsson }; 12819baf839SRobert Olsson 12919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13019baf839SRobert Olsson struct trie_use_stats { 13119baf839SRobert Olsson unsigned int gets; 13219baf839SRobert Olsson unsigned int backtrack; 13319baf839SRobert Olsson unsigned int semantic_match_passed; 13419baf839SRobert Olsson unsigned int semantic_match_miss; 13519baf839SRobert Olsson unsigned int null_node_hit; 1362f36895aSRobert Olsson unsigned int resize_node_skipped; 13719baf839SRobert Olsson }; 13819baf839SRobert Olsson #endif 13919baf839SRobert Olsson 14019baf839SRobert Olsson struct trie_stat { 14119baf839SRobert Olsson unsigned int totdepth; 14219baf839SRobert Olsson unsigned int maxdepth; 14319baf839SRobert Olsson unsigned int tnodes; 14419baf839SRobert Olsson unsigned int leaves; 14519baf839SRobert Olsson unsigned int nullpointers; 14693672292SStephen Hemminger unsigned int prefixes; 14706ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 14819baf839SRobert Olsson }; 14919baf839SRobert Olsson 15019baf839SRobert Olsson struct trie { 15119baf839SRobert Olsson struct node *trie; 15219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 15319baf839SRobert Olsson struct trie_use_stats stats; 15419baf839SRobert Olsson #endif 15519baf839SRobert Olsson }; 15619baf839SRobert Olsson 15719baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 158a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 159a07f5f50SStephen Hemminger int wasfull); 16019baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn); 1612f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn); 1622f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn); 16319baf839SRobert Olsson static void tnode_free(struct tnode *tn); 16419baf839SRobert Olsson 165e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 166bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 16719baf839SRobert Olsson 16806801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node) 16906801916SStephen Hemminger { 170b59cfbf7SEric Dumazet return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 171b59cfbf7SEric Dumazet } 17206801916SStephen Hemminger 173b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node) 174b59cfbf7SEric Dumazet { 175b59cfbf7SEric Dumazet struct tnode *ret = node_parent(node); 176b59cfbf7SEric Dumazet 17706801916SStephen Hemminger return rcu_dereference(ret); 17806801916SStephen Hemminger } 17906801916SStephen Hemminger 18006801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr) 18106801916SStephen Hemminger { 18206801916SStephen Hemminger rcu_assign_pointer(node->parent, 18306801916SStephen Hemminger (unsigned long)ptr | NODE_TYPE(node)); 18406801916SStephen Hemminger } 1852373ce1cSRobert Olsson 186b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 18719baf839SRobert Olsson { 188b59cfbf7SEric Dumazet BUG_ON(i >= 1U << tn->bits); 18919baf839SRobert Olsson 190b59cfbf7SEric Dumazet return tn->child[i]; 191b59cfbf7SEric Dumazet } 192b59cfbf7SEric Dumazet 193b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 194b59cfbf7SEric Dumazet { 195b59cfbf7SEric Dumazet struct node *ret = tnode_get_child(tn, i); 196b59cfbf7SEric Dumazet 197b59cfbf7SEric Dumazet return rcu_dereference(ret); 19819baf839SRobert Olsson } 19919baf839SRobert Olsson 200bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn) 20119baf839SRobert Olsson { 20219baf839SRobert Olsson return 1 << tn->bits; 20319baf839SRobert Olsson } 20419baf839SRobert Olsson 205ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l) 206ab66b4a7SStephen Hemminger { 207ab66b4a7SStephen Hemminger return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 208ab66b4a7SStephen Hemminger } 209ab66b4a7SStephen Hemminger 21019baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 21119baf839SRobert Olsson { 21219baf839SRobert Olsson if (offset < KEYLENGTH) 21319baf839SRobert Olsson return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 21419baf839SRobert Olsson else 21519baf839SRobert Olsson return 0; 21619baf839SRobert Olsson } 21719baf839SRobert Olsson 21819baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b) 21919baf839SRobert Olsson { 22019baf839SRobert Olsson return a == b; 22119baf839SRobert Olsson } 22219baf839SRobert Olsson 22319baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 22419baf839SRobert Olsson { 22519baf839SRobert Olsson if (bits == 0 || offset >= KEYLENGTH) 22619baf839SRobert Olsson return 1; 22719baf839SRobert Olsson bits = bits > KEYLENGTH ? KEYLENGTH : bits; 22819baf839SRobert Olsson return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 22919baf839SRobert Olsson } 23019baf839SRobert Olsson 23119baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b) 23219baf839SRobert Olsson { 23319baf839SRobert Olsson t_key diff = a ^ b; 23419baf839SRobert Olsson int i = offset; 23519baf839SRobert Olsson 23619baf839SRobert Olsson if (!diff) 23719baf839SRobert Olsson return 0; 23819baf839SRobert Olsson while ((diff << i) >> (KEYLENGTH-1) == 0) 23919baf839SRobert Olsson i++; 24019baf839SRobert Olsson return i; 24119baf839SRobert Olsson } 24219baf839SRobert Olsson 24319baf839SRobert Olsson /* 24419baf839SRobert Olsson To understand this stuff, an understanding of keys and all their bits is 24519baf839SRobert Olsson necessary. Every node in the trie has a key associated with it, but not 24619baf839SRobert Olsson all of the bits in that key are significant. 24719baf839SRobert Olsson 24819baf839SRobert Olsson Consider a node 'n' and its parent 'tp'. 24919baf839SRobert Olsson 25019baf839SRobert Olsson If n is a leaf, every bit in its key is significant. Its presence is 251772cb712SRobert Olsson necessitated by path compression, since during a tree traversal (when 25219baf839SRobert Olsson searching for a leaf - unless we are doing an insertion) we will completely 25319baf839SRobert Olsson ignore all skipped bits we encounter. Thus we need to verify, at the end of 25419baf839SRobert Olsson a potentially successful search, that we have indeed been walking the 25519baf839SRobert Olsson correct key path. 25619baf839SRobert Olsson 25719baf839SRobert Olsson Note that we can never "miss" the correct key in the tree if present by 25819baf839SRobert Olsson following the wrong path. Path compression ensures that segments of the key 25919baf839SRobert Olsson that are the same for all keys with a given prefix are skipped, but the 26019baf839SRobert Olsson skipped part *is* identical for each node in the subtrie below the skipped 26119baf839SRobert Olsson bit! trie_insert() in this implementation takes care of that - note the 26219baf839SRobert Olsson call to tkey_sub_equals() in trie_insert(). 26319baf839SRobert Olsson 26419baf839SRobert Olsson if n is an internal node - a 'tnode' here, the various parts of its key 26519baf839SRobert Olsson have many different meanings. 26619baf839SRobert Olsson 26719baf839SRobert Olsson Example: 26819baf839SRobert Olsson _________________________________________________________________ 26919baf839SRobert Olsson | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 27019baf839SRobert Olsson ----------------------------------------------------------------- 27119baf839SRobert Olsson 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 27219baf839SRobert Olsson 27319baf839SRobert Olsson _________________________________________________________________ 27419baf839SRobert Olsson | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 27519baf839SRobert Olsson ----------------------------------------------------------------- 27619baf839SRobert Olsson 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 27719baf839SRobert Olsson 27819baf839SRobert Olsson tp->pos = 7 27919baf839SRobert Olsson tp->bits = 3 28019baf839SRobert Olsson n->pos = 15 28119baf839SRobert Olsson n->bits = 4 28219baf839SRobert Olsson 28319baf839SRobert Olsson First, let's just ignore the bits that come before the parent tp, that is 28419baf839SRobert Olsson the bits from 0 to (tp->pos-1). They are *known* but at this point we do 28519baf839SRobert Olsson not use them for anything. 28619baf839SRobert Olsson 28719baf839SRobert Olsson The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 28819baf839SRobert Olsson index into the parent's child array. That is, they will be used to find 28919baf839SRobert Olsson 'n' among tp's children. 29019baf839SRobert Olsson 29119baf839SRobert Olsson The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 29219baf839SRobert Olsson for the node n. 29319baf839SRobert Olsson 29419baf839SRobert Olsson All the bits we have seen so far are significant to the node n. The rest 29519baf839SRobert Olsson of the bits are really not needed or indeed known in n->key. 29619baf839SRobert Olsson 29719baf839SRobert Olsson The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 29819baf839SRobert Olsson n's child array, and will of course be different for each child. 29919baf839SRobert Olsson 300c877efb2SStephen Hemminger 30119baf839SRobert Olsson The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 30219baf839SRobert Olsson at this point. 30319baf839SRobert Olsson 30419baf839SRobert Olsson */ 30519baf839SRobert Olsson 3060c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn) 30719baf839SRobert Olsson { 3080c7770c7SStephen Hemminger WARN_ON(tn && tn->pos+tn->bits > 32); 30919baf839SRobert Olsson } 31019baf839SRobert Olsson 311f5026fabSDenis V. Lunev static const int halve_threshold = 25; 312f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 313f5026fabSDenis V. Lunev static const int halve_threshold_root = 8; 314f5026fabSDenis V. Lunev static const int inflate_threshold_root = 15; 31519baf839SRobert Olsson 3162373ce1cSRobert Olsson 3172373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3182373ce1cSRobert Olsson { 3192373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3202373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3212373ce1cSRobert Olsson } 3222373ce1cSRobert Olsson 3232373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3242373ce1cSRobert Olsson { 3252373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3262373ce1cSRobert Olsson } 3272373ce1cSRobert Olsson 3282373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head) 3292373ce1cSRobert Olsson { 330bc3c8c1eSStephen Hemminger struct leaf *l = container_of(head, struct leaf, rcu); 331bc3c8c1eSStephen Hemminger kmem_cache_free(trie_leaf_kmem, l); 3322373ce1cSRobert Olsson } 3332373ce1cSRobert Olsson 3342373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head) 3352373ce1cSRobert Olsson { 3362373ce1cSRobert Olsson kfree(container_of(head, struct leaf_info, rcu)); 3372373ce1cSRobert Olsson } 3382373ce1cSRobert Olsson 3392373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 3402373ce1cSRobert Olsson { 3412373ce1cSRobert Olsson call_rcu(&leaf->rcu, __leaf_info_free_rcu); 3422373ce1cSRobert Olsson } 3432373ce1cSRobert Olsson 3448d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size) 3452373ce1cSRobert Olsson { 3462373ce1cSRobert Olsson struct page *pages; 3472373ce1cSRobert Olsson 3482373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3498d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 3502373ce1cSRobert Olsson 3512373ce1cSRobert Olsson pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 3522373ce1cSRobert Olsson if (!pages) 3532373ce1cSRobert Olsson return NULL; 3542373ce1cSRobert Olsson 3552373ce1cSRobert Olsson return page_address(pages); 3562373ce1cSRobert Olsson } 3572373ce1cSRobert Olsson 3582373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head) 3592373ce1cSRobert Olsson { 3602373ce1cSRobert Olsson struct tnode *tn = container_of(head, struct tnode, rcu); 3618d965444SEric Dumazet size_t size = sizeof(struct tnode) + 3628d965444SEric Dumazet (sizeof(struct node *) << tn->bits); 3632373ce1cSRobert Olsson 3642373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3652373ce1cSRobert Olsson kfree(tn); 3662373ce1cSRobert Olsson else 3672373ce1cSRobert Olsson free_pages((unsigned long)tn, get_order(size)); 3682373ce1cSRobert Olsson } 3692373ce1cSRobert Olsson 3702373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn) 3712373ce1cSRobert Olsson { 372550e29bcSRobert Olsson if (IS_LEAF(tn)) { 373550e29bcSRobert Olsson struct leaf *l = (struct leaf *) tn; 374550e29bcSRobert Olsson call_rcu_bh(&l->rcu, __leaf_free_rcu); 375132adf54SStephen Hemminger } else 3762373ce1cSRobert Olsson call_rcu(&tn->rcu, __tnode_free_rcu); 3772373ce1cSRobert Olsson } 3782373ce1cSRobert Olsson 37919baf839SRobert Olsson static struct leaf *leaf_new(void) 38019baf839SRobert Olsson { 381bc3c8c1eSStephen Hemminger struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 38219baf839SRobert Olsson if (l) { 3832373ce1cSRobert Olsson l->parent = T_LEAF; 38419baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 38519baf839SRobert Olsson } 38619baf839SRobert Olsson return l; 38719baf839SRobert Olsson } 38819baf839SRobert Olsson 38919baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 39019baf839SRobert Olsson { 39119baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 3922373ce1cSRobert Olsson if (li) { 39319baf839SRobert Olsson li->plen = plen; 39419baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 3952373ce1cSRobert Olsson } 39619baf839SRobert Olsson return li; 39719baf839SRobert Olsson } 39819baf839SRobert Olsson 39919baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits) 40019baf839SRobert Olsson { 4018d965444SEric Dumazet size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 402f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 40319baf839SRobert Olsson 40419baf839SRobert Olsson if (tn) { 4052373ce1cSRobert Olsson tn->parent = T_TNODE; 40619baf839SRobert Olsson tn->pos = pos; 40719baf839SRobert Olsson tn->bits = bits; 40819baf839SRobert Olsson tn->key = key; 40919baf839SRobert Olsson tn->full_children = 0; 41019baf839SRobert Olsson tn->empty_children = 1<<bits; 41119baf839SRobert Olsson } 412c877efb2SStephen Hemminger 4138d965444SEric Dumazet pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 4148d965444SEric Dumazet (unsigned long) (sizeof(struct node) << bits)); 41519baf839SRobert Olsson return tn; 41619baf839SRobert Olsson } 41719baf839SRobert Olsson 41819baf839SRobert Olsson /* 41919baf839SRobert Olsson * Check whether a tnode 'n' is "full", i.e. it is an internal node 42019baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 42119baf839SRobert Olsson */ 42219baf839SRobert Olsson 423bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n) 42419baf839SRobert Olsson { 42519baf839SRobert Olsson if (n == NULL || IS_LEAF(n)) 42619baf839SRobert Olsson return 0; 42719baf839SRobert Olsson 42819baf839SRobert Olsson return ((struct tnode *) n)->pos == tn->pos + tn->bits; 42919baf839SRobert Olsson } 43019baf839SRobert Olsson 431a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i, 432a07f5f50SStephen Hemminger struct node *n) 43319baf839SRobert Olsson { 43419baf839SRobert Olsson tnode_put_child_reorg(tn, i, n, -1); 43519baf839SRobert Olsson } 43619baf839SRobert Olsson 43719baf839SRobert Olsson /* 43819baf839SRobert Olsson * Add a child at position i overwriting the old value. 43919baf839SRobert Olsson * Update the value of full_children and empty_children. 44019baf839SRobert Olsson */ 44119baf839SRobert Olsson 442a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 443a07f5f50SStephen Hemminger int wasfull) 44419baf839SRobert Olsson { 4452373ce1cSRobert Olsson struct node *chi = tn->child[i]; 44619baf839SRobert Olsson int isfull; 44719baf839SRobert Olsson 4480c7770c7SStephen Hemminger BUG_ON(i >= 1<<tn->bits); 4490c7770c7SStephen Hemminger 45019baf839SRobert Olsson 45119baf839SRobert Olsson /* update emptyChildren */ 45219baf839SRobert Olsson if (n == NULL && chi != NULL) 45319baf839SRobert Olsson tn->empty_children++; 45419baf839SRobert Olsson else if (n != NULL && chi == NULL) 45519baf839SRobert Olsson tn->empty_children--; 45619baf839SRobert Olsson 45719baf839SRobert Olsson /* update fullChildren */ 45819baf839SRobert Olsson if (wasfull == -1) 45919baf839SRobert Olsson wasfull = tnode_full(tn, chi); 46019baf839SRobert Olsson 46119baf839SRobert Olsson isfull = tnode_full(tn, n); 46219baf839SRobert Olsson if (wasfull && !isfull) 46319baf839SRobert Olsson tn->full_children--; 46419baf839SRobert Olsson else if (!wasfull && isfull) 46519baf839SRobert Olsson tn->full_children++; 46691b9a277SOlof Johansson 46719baf839SRobert Olsson if (n) 46806801916SStephen Hemminger node_set_parent(n, tn); 46919baf839SRobert Olsson 4702373ce1cSRobert Olsson rcu_assign_pointer(tn->child[i], n); 47119baf839SRobert Olsson } 47219baf839SRobert Olsson 47319baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn) 47419baf839SRobert Olsson { 47519baf839SRobert Olsson int i; 4762f36895aSRobert Olsson int err = 0; 4772f80b3c8SRobert Olsson struct tnode *old_tn; 478e6308be8SRobert Olsson int inflate_threshold_use; 479e6308be8SRobert Olsson int halve_threshold_use; 48005eee48cSRobert Olsson int max_resize; 48119baf839SRobert Olsson 48219baf839SRobert Olsson if (!tn) 48319baf839SRobert Olsson return NULL; 48419baf839SRobert Olsson 4850c7770c7SStephen Hemminger pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 48619baf839SRobert Olsson tn, inflate_threshold, halve_threshold); 48719baf839SRobert Olsson 48819baf839SRobert Olsson /* No children */ 48919baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn)) { 49019baf839SRobert Olsson tnode_free(tn); 49119baf839SRobert Olsson return NULL; 49219baf839SRobert Olsson } 49319baf839SRobert Olsson /* One child */ 49419baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 49519baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 49691b9a277SOlof Johansson struct node *n; 49719baf839SRobert Olsson 49891b9a277SOlof Johansson n = tn->child[i]; 4992373ce1cSRobert Olsson if (!n) 50091b9a277SOlof Johansson continue; 50119baf839SRobert Olsson 50219baf839SRobert Olsson /* compress one level */ 50306801916SStephen Hemminger node_set_parent(n, NULL); 50419baf839SRobert Olsson tnode_free(tn); 50519baf839SRobert Olsson return n; 50619baf839SRobert Olsson } 50719baf839SRobert Olsson /* 50819baf839SRobert Olsson * Double as long as the resulting node has a number of 50919baf839SRobert Olsson * nonempty nodes that are above the threshold. 51019baf839SRobert Olsson */ 51119baf839SRobert Olsson 51219baf839SRobert Olsson /* 51319baf839SRobert Olsson * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 51419baf839SRobert Olsson * the Helsinki University of Technology and Matti Tikkanen of Nokia 51519baf839SRobert Olsson * Telecommunications, page 6: 51619baf839SRobert Olsson * "A node is doubled if the ratio of non-empty children to all 51719baf839SRobert Olsson * children in the *doubled* node is at least 'high'." 51819baf839SRobert Olsson * 51919baf839SRobert Olsson * 'high' in this instance is the variable 'inflate_threshold'. It 52019baf839SRobert Olsson * is expressed as a percentage, so we multiply it with 52119baf839SRobert Olsson * tnode_child_length() and instead of multiplying by 2 (since the 52219baf839SRobert Olsson * child array will be doubled by inflate()) and multiplying 52319baf839SRobert Olsson * the left-hand side by 100 (to handle the percentage thing) we 52419baf839SRobert Olsson * multiply the left-hand side by 50. 52519baf839SRobert Olsson * 52619baf839SRobert Olsson * The left-hand side may look a bit weird: tnode_child_length(tn) 52719baf839SRobert Olsson * - tn->empty_children is of course the number of non-null children 52819baf839SRobert Olsson * in the current node. tn->full_children is the number of "full" 52919baf839SRobert Olsson * children, that is non-null tnodes with a skip value of 0. 53019baf839SRobert Olsson * All of those will be doubled in the resulting inflated tnode, so 53119baf839SRobert Olsson * we just count them one extra time here. 53219baf839SRobert Olsson * 53319baf839SRobert Olsson * A clearer way to write this would be: 53419baf839SRobert Olsson * 53519baf839SRobert Olsson * to_be_doubled = tn->full_children; 53619baf839SRobert Olsson * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 53719baf839SRobert Olsson * tn->full_children; 53819baf839SRobert Olsson * 53919baf839SRobert Olsson * new_child_length = tnode_child_length(tn) * 2; 54019baf839SRobert Olsson * 54119baf839SRobert Olsson * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 54219baf839SRobert Olsson * new_child_length; 54319baf839SRobert Olsson * if (new_fill_factor >= inflate_threshold) 54419baf839SRobert Olsson * 54519baf839SRobert Olsson * ...and so on, tho it would mess up the while () loop. 54619baf839SRobert Olsson * 54719baf839SRobert Olsson * anyway, 54819baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 54919baf839SRobert Olsson * inflate_threshold 55019baf839SRobert Olsson * 55119baf839SRobert Olsson * avoid a division: 55219baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 55319baf839SRobert Olsson * inflate_threshold * new_child_length 55419baf839SRobert Olsson * 55519baf839SRobert Olsson * expand not_to_be_doubled and to_be_doubled, and shorten: 55619baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 55719baf839SRobert Olsson * tn->full_children) >= inflate_threshold * new_child_length 55819baf839SRobert Olsson * 55919baf839SRobert Olsson * expand new_child_length: 56019baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 56119baf839SRobert Olsson * tn->full_children) >= 56219baf839SRobert Olsson * inflate_threshold * tnode_child_length(tn) * 2 56319baf839SRobert Olsson * 56419baf839SRobert Olsson * shorten again: 56519baf839SRobert Olsson * 50 * (tn->full_children + tnode_child_length(tn) - 56619baf839SRobert Olsson * tn->empty_children) >= inflate_threshold * 56719baf839SRobert Olsson * tnode_child_length(tn) 56819baf839SRobert Olsson * 56919baf839SRobert Olsson */ 57019baf839SRobert Olsson 57119baf839SRobert Olsson check_tnode(tn); 57219baf839SRobert Olsson 573e6308be8SRobert Olsson /* Keep root node larger */ 574e6308be8SRobert Olsson 575e6308be8SRobert Olsson if (!tn->parent) 576e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold_root; 577e6308be8SRobert Olsson else 578e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold; 579e6308be8SRobert Olsson 5802f36895aSRobert Olsson err = 0; 58105eee48cSRobert Olsson max_resize = 10; 58205eee48cSRobert Olsson while ((tn->full_children > 0 && max_resize-- && 583a07f5f50SStephen Hemminger 50 * (tn->full_children + tnode_child_length(tn) 584a07f5f50SStephen Hemminger - tn->empty_children) 585a07f5f50SStephen Hemminger >= inflate_threshold_use * tnode_child_length(tn))) { 58619baf839SRobert Olsson 5872f80b3c8SRobert Olsson old_tn = tn; 5882f80b3c8SRobert Olsson tn = inflate(t, tn); 589a07f5f50SStephen Hemminger 5902f80b3c8SRobert Olsson if (IS_ERR(tn)) { 5912f80b3c8SRobert Olsson tn = old_tn; 5922f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 5932f36895aSRobert Olsson t->stats.resize_node_skipped++; 5942f36895aSRobert Olsson #endif 5952f36895aSRobert Olsson break; 5962f36895aSRobert Olsson } 59719baf839SRobert Olsson } 59819baf839SRobert Olsson 59905eee48cSRobert Olsson if (max_resize < 0) { 60005eee48cSRobert Olsson if (!tn->parent) 601a07f5f50SStephen Hemminger pr_warning("Fix inflate_threshold_root." 602a07f5f50SStephen Hemminger " Now=%d size=%d bits\n", 60305eee48cSRobert Olsson inflate_threshold_root, tn->bits); 60405eee48cSRobert Olsson else 605a07f5f50SStephen Hemminger pr_warning("Fix inflate_threshold." 606a07f5f50SStephen Hemminger " Now=%d size=%d bits\n", 60705eee48cSRobert Olsson inflate_threshold, tn->bits); 60805eee48cSRobert Olsson } 60905eee48cSRobert Olsson 61019baf839SRobert Olsson check_tnode(tn); 61119baf839SRobert Olsson 61219baf839SRobert Olsson /* 61319baf839SRobert Olsson * Halve as long as the number of empty children in this 61419baf839SRobert Olsson * node is above threshold. 61519baf839SRobert Olsson */ 6162f36895aSRobert Olsson 617e6308be8SRobert Olsson 618e6308be8SRobert Olsson /* Keep root node larger */ 619e6308be8SRobert Olsson 620e6308be8SRobert Olsson if (!tn->parent) 621e6308be8SRobert Olsson halve_threshold_use = halve_threshold_root; 622e6308be8SRobert Olsson else 623e6308be8SRobert Olsson halve_threshold_use = halve_threshold; 624e6308be8SRobert Olsson 6252f36895aSRobert Olsson err = 0; 62605eee48cSRobert Olsson max_resize = 10; 62705eee48cSRobert Olsson while (tn->bits > 1 && max_resize-- && 62819baf839SRobert Olsson 100 * (tnode_child_length(tn) - tn->empty_children) < 629e6308be8SRobert Olsson halve_threshold_use * tnode_child_length(tn)) { 63019baf839SRobert Olsson 6312f80b3c8SRobert Olsson old_tn = tn; 6322f80b3c8SRobert Olsson tn = halve(t, tn); 6332f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6342f80b3c8SRobert Olsson tn = old_tn; 6352f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6362f36895aSRobert Olsson t->stats.resize_node_skipped++; 6372f36895aSRobert Olsson #endif 6382f36895aSRobert Olsson break; 6392f36895aSRobert Olsson } 6402f36895aSRobert Olsson } 6412f36895aSRobert Olsson 64205eee48cSRobert Olsson if (max_resize < 0) { 64305eee48cSRobert Olsson if (!tn->parent) 644a07f5f50SStephen Hemminger pr_warning("Fix halve_threshold_root." 645a07f5f50SStephen Hemminger " Now=%d size=%d bits\n", 64605eee48cSRobert Olsson halve_threshold_root, tn->bits); 64705eee48cSRobert Olsson else 648a07f5f50SStephen Hemminger pr_warning("Fix halve_threshold." 649a07f5f50SStephen Hemminger " Now=%d size=%d bits\n", 65005eee48cSRobert Olsson halve_threshold, tn->bits); 65105eee48cSRobert Olsson } 65219baf839SRobert Olsson 65319baf839SRobert Olsson /* Only one child remains */ 65419baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 65519baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 65691b9a277SOlof Johansson struct node *n; 65719baf839SRobert Olsson 65891b9a277SOlof Johansson n = tn->child[i]; 6592373ce1cSRobert Olsson if (!n) 66091b9a277SOlof Johansson continue; 66191b9a277SOlof Johansson 66291b9a277SOlof Johansson /* compress one level */ 66391b9a277SOlof Johansson 66406801916SStephen Hemminger node_set_parent(n, NULL); 66519baf839SRobert Olsson tnode_free(tn); 66619baf839SRobert Olsson return n; 66719baf839SRobert Olsson } 66819baf839SRobert Olsson 66919baf839SRobert Olsson return (struct node *) tn; 67019baf839SRobert Olsson } 67119baf839SRobert Olsson 6722f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn) 67319baf839SRobert Olsson { 67419baf839SRobert Olsson struct tnode *oldtnode = tn; 67519baf839SRobert Olsson int olen = tnode_child_length(tn); 67619baf839SRobert Olsson int i; 67719baf839SRobert Olsson 6780c7770c7SStephen Hemminger pr_debug("In inflate\n"); 67919baf839SRobert Olsson 68019baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 68119baf839SRobert Olsson 6822f80b3c8SRobert Olsson if (!tn) 6832f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 6842f36895aSRobert Olsson 6852f36895aSRobert Olsson /* 6862f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 6872f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 6882f36895aSRobert Olsson * fails. In case of failure we return the oldnode and inflate 6892f36895aSRobert Olsson * of tnode is ignored. 6902f36895aSRobert Olsson */ 6912f36895aSRobert Olsson 6922f36895aSRobert Olsson for (i = 0; i < olen; i++) { 693a07f5f50SStephen Hemminger struct tnode *inode; 6942f36895aSRobert Olsson 695a07f5f50SStephen Hemminger inode = (struct tnode *) tnode_get_child(oldtnode, i); 6962f36895aSRobert Olsson if (inode && 6972f36895aSRobert Olsson IS_TNODE(inode) && 6982f36895aSRobert Olsson inode->pos == oldtnode->pos + oldtnode->bits && 6992f36895aSRobert Olsson inode->bits > 1) { 7002f36895aSRobert Olsson struct tnode *left, *right; 701ab66b4a7SStephen Hemminger t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; 7022f36895aSRobert Olsson 7032f36895aSRobert Olsson left = tnode_new(inode->key&(~m), inode->pos + 1, 7042f36895aSRobert Olsson inode->bits - 1); 7052f80b3c8SRobert Olsson if (!left) 7062f80b3c8SRobert Olsson goto nomem; 7072f36895aSRobert Olsson 7082f36895aSRobert Olsson right = tnode_new(inode->key|m, inode->pos + 1, 7092f36895aSRobert Olsson inode->bits - 1); 7102f36895aSRobert Olsson 7112f36895aSRobert Olsson if (!right) { 7122f80b3c8SRobert Olsson tnode_free(left); 7132f80b3c8SRobert Olsson goto nomem; 7142f36895aSRobert Olsson } 7152f36895aSRobert Olsson 7162f36895aSRobert Olsson put_child(t, tn, 2*i, (struct node *) left); 7172f36895aSRobert Olsson put_child(t, tn, 2*i+1, (struct node *) right); 7182f36895aSRobert Olsson } 7192f36895aSRobert Olsson } 7202f36895aSRobert Olsson 72119baf839SRobert Olsson for (i = 0; i < olen; i++) { 722c95aaf9aSStephen Hemminger struct tnode *inode; 72319baf839SRobert Olsson struct node *node = tnode_get_child(oldtnode, i); 72491b9a277SOlof Johansson struct tnode *left, *right; 72591b9a277SOlof Johansson int size, j; 72619baf839SRobert Olsson 72719baf839SRobert Olsson /* An empty child */ 72819baf839SRobert Olsson if (node == NULL) 72919baf839SRobert Olsson continue; 73019baf839SRobert Olsson 73119baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 73219baf839SRobert Olsson 73319baf839SRobert Olsson if (IS_LEAF(node) || ((struct tnode *) node)->pos > 73419baf839SRobert Olsson tn->pos + tn->bits - 1) { 735a07f5f50SStephen Hemminger if (tkey_extract_bits(node->key, 736a07f5f50SStephen Hemminger oldtnode->pos + oldtnode->bits, 73719baf839SRobert Olsson 1) == 0) 73819baf839SRobert Olsson put_child(t, tn, 2*i, node); 73919baf839SRobert Olsson else 74019baf839SRobert Olsson put_child(t, tn, 2*i+1, node); 74119baf839SRobert Olsson continue; 74219baf839SRobert Olsson } 74319baf839SRobert Olsson 74419baf839SRobert Olsson /* An internal node with two children */ 74519baf839SRobert Olsson inode = (struct tnode *) node; 74619baf839SRobert Olsson 74719baf839SRobert Olsson if (inode->bits == 1) { 74819baf839SRobert Olsson put_child(t, tn, 2*i, inode->child[0]); 74919baf839SRobert Olsson put_child(t, tn, 2*i+1, inode->child[1]); 75019baf839SRobert Olsson 75119baf839SRobert Olsson tnode_free(inode); 75291b9a277SOlof Johansson continue; 75319baf839SRobert Olsson } 75419baf839SRobert Olsson 75519baf839SRobert Olsson /* An internal node with more than two children */ 75619baf839SRobert Olsson 75719baf839SRobert Olsson /* We will replace this node 'inode' with two new 75819baf839SRobert Olsson * ones, 'left' and 'right', each with half of the 75919baf839SRobert Olsson * original children. The two new nodes will have 76019baf839SRobert Olsson * a position one bit further down the key and this 76119baf839SRobert Olsson * means that the "significant" part of their keys 76219baf839SRobert Olsson * (see the discussion near the top of this file) 76319baf839SRobert Olsson * will differ by one bit, which will be "0" in 76419baf839SRobert Olsson * left's key and "1" in right's key. Since we are 76519baf839SRobert Olsson * moving the key position by one step, the bit that 76619baf839SRobert Olsson * we are moving away from - the bit at position 76719baf839SRobert Olsson * (inode->pos) - is the one that will differ between 76819baf839SRobert Olsson * left and right. So... we synthesize that bit in the 76919baf839SRobert Olsson * two new keys. 77019baf839SRobert Olsson * The mask 'm' below will be a single "one" bit at 77119baf839SRobert Olsson * the position (inode->pos) 77219baf839SRobert Olsson */ 77319baf839SRobert Olsson 77419baf839SRobert Olsson /* Use the old key, but set the new significant 77519baf839SRobert Olsson * bit to zero. 77619baf839SRobert Olsson */ 7772f36895aSRobert Olsson 7782f36895aSRobert Olsson left = (struct tnode *) tnode_get_child(tn, 2*i); 7792f36895aSRobert Olsson put_child(t, tn, 2*i, NULL); 78019baf839SRobert Olsson 78191b9a277SOlof Johansson BUG_ON(!left); 78219baf839SRobert Olsson 7832f36895aSRobert Olsson right = (struct tnode *) tnode_get_child(tn, 2*i+1); 7842f36895aSRobert Olsson put_child(t, tn, 2*i+1, NULL); 78519baf839SRobert Olsson 78691b9a277SOlof Johansson BUG_ON(!right); 78719baf839SRobert Olsson 78819baf839SRobert Olsson size = tnode_child_length(left); 78919baf839SRobert Olsson for (j = 0; j < size; j++) { 79019baf839SRobert Olsson put_child(t, left, j, inode->child[j]); 79119baf839SRobert Olsson put_child(t, right, j, inode->child[j + size]); 79219baf839SRobert Olsson } 79319baf839SRobert Olsson put_child(t, tn, 2*i, resize(t, left)); 79419baf839SRobert Olsson put_child(t, tn, 2*i+1, resize(t, right)); 79519baf839SRobert Olsson 79619baf839SRobert Olsson tnode_free(inode); 79719baf839SRobert Olsson } 79819baf839SRobert Olsson tnode_free(oldtnode); 79919baf839SRobert Olsson return tn; 8002f80b3c8SRobert Olsson nomem: 8012f80b3c8SRobert Olsson { 8022f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8032f80b3c8SRobert Olsson int j; 8042f80b3c8SRobert Olsson 8052f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8062f80b3c8SRobert Olsson if (tn->child[j]) 8072f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8082f80b3c8SRobert Olsson 8092f80b3c8SRobert Olsson tnode_free(tn); 8102f80b3c8SRobert Olsson 8112f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8122f80b3c8SRobert Olsson } 81319baf839SRobert Olsson } 81419baf839SRobert Olsson 8152f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn) 81619baf839SRobert Olsson { 81719baf839SRobert Olsson struct tnode *oldtnode = tn; 81819baf839SRobert Olsson struct node *left, *right; 81919baf839SRobert Olsson int i; 82019baf839SRobert Olsson int olen = tnode_child_length(tn); 82119baf839SRobert Olsson 8220c7770c7SStephen Hemminger pr_debug("In halve\n"); 82319baf839SRobert Olsson 82419baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 82519baf839SRobert Olsson 8262f80b3c8SRobert Olsson if (!tn) 8272f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8282f36895aSRobert Olsson 8292f36895aSRobert Olsson /* 8302f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 8312f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 8322f36895aSRobert Olsson * fails. In case of failure we return the oldnode and halve 8332f36895aSRobert Olsson * of tnode is ignored. 8342f36895aSRobert Olsson */ 8352f36895aSRobert Olsson 8362f36895aSRobert Olsson for (i = 0; i < olen; i += 2) { 8372f36895aSRobert Olsson left = tnode_get_child(oldtnode, i); 8382f36895aSRobert Olsson right = tnode_get_child(oldtnode, i+1); 8392f36895aSRobert Olsson 8402f36895aSRobert Olsson /* Two nonempty children */ 8412f36895aSRobert Olsson if (left && right) { 8422f80b3c8SRobert Olsson struct tnode *newn; 8432f36895aSRobert Olsson 8442f80b3c8SRobert Olsson newn = tnode_new(left->key, tn->pos + tn->bits, 1); 8452f80b3c8SRobert Olsson 8462f80b3c8SRobert Olsson if (!newn) 8472f80b3c8SRobert Olsson goto nomem; 8482f80b3c8SRobert Olsson 8492f80b3c8SRobert Olsson put_child(t, tn, i/2, (struct node *)newn); 8502f36895aSRobert Olsson } 8512f36895aSRobert Olsson 8522f36895aSRobert Olsson } 85319baf839SRobert Olsson 85419baf839SRobert Olsson for (i = 0; i < olen; i += 2) { 85591b9a277SOlof Johansson struct tnode *newBinNode; 85691b9a277SOlof Johansson 85719baf839SRobert Olsson left = tnode_get_child(oldtnode, i); 85819baf839SRobert Olsson right = tnode_get_child(oldtnode, i+1); 85919baf839SRobert Olsson 86019baf839SRobert Olsson /* At least one of the children is empty */ 86119baf839SRobert Olsson if (left == NULL) { 86219baf839SRobert Olsson if (right == NULL) /* Both are empty */ 86319baf839SRobert Olsson continue; 86419baf839SRobert Olsson put_child(t, tn, i/2, right); 86591b9a277SOlof Johansson continue; 86691b9a277SOlof Johansson } 86791b9a277SOlof Johansson 86891b9a277SOlof Johansson if (right == NULL) { 86919baf839SRobert Olsson put_child(t, tn, i/2, left); 87091b9a277SOlof Johansson continue; 87191b9a277SOlof Johansson } 87219baf839SRobert Olsson 87319baf839SRobert Olsson /* Two nonempty children */ 87491b9a277SOlof Johansson newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 8752f36895aSRobert Olsson put_child(t, tn, i/2, NULL); 87619baf839SRobert Olsson put_child(t, newBinNode, 0, left); 87719baf839SRobert Olsson put_child(t, newBinNode, 1, right); 87819baf839SRobert Olsson put_child(t, tn, i/2, resize(t, newBinNode)); 87919baf839SRobert Olsson } 88019baf839SRobert Olsson tnode_free(oldtnode); 88119baf839SRobert Olsson return tn; 8822f80b3c8SRobert Olsson nomem: 8832f80b3c8SRobert Olsson { 8842f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8852f80b3c8SRobert Olsson int j; 8862f80b3c8SRobert Olsson 8872f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8882f80b3c8SRobert Olsson if (tn->child[j]) 8892f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8902f80b3c8SRobert Olsson 8912f80b3c8SRobert Olsson tnode_free(tn); 8922f80b3c8SRobert Olsson 8932f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8942f80b3c8SRobert Olsson } 89519baf839SRobert Olsson } 89619baf839SRobert Olsson 897772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 8982373ce1cSRobert Olsson via get_fa_head and dump */ 8992373ce1cSRobert Olsson 900772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen) 90119baf839SRobert Olsson { 902772cb712SRobert Olsson struct hlist_head *head = &l->list; 90319baf839SRobert Olsson struct hlist_node *node; 90419baf839SRobert Olsson struct leaf_info *li; 90519baf839SRobert Olsson 9062373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, head, hlist) 90719baf839SRobert Olsson if (li->plen == plen) 90819baf839SRobert Olsson return li; 90991b9a277SOlof Johansson 91019baf839SRobert Olsson return NULL; 91119baf839SRobert Olsson } 91219baf839SRobert Olsson 91319baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen) 91419baf839SRobert Olsson { 915772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 91619baf839SRobert Olsson 91791b9a277SOlof Johansson if (!li) 91891b9a277SOlof Johansson return NULL; 91919baf839SRobert Olsson 92091b9a277SOlof Johansson return &li->falh; 92119baf839SRobert Olsson } 92219baf839SRobert Olsson 92319baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 92419baf839SRobert Olsson { 92519baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 92691b9a277SOlof Johansson struct hlist_node *node; 92719baf839SRobert Olsson 92891b9a277SOlof Johansson if (hlist_empty(head)) { 9292373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 93091b9a277SOlof Johansson } else { 93191b9a277SOlof Johansson hlist_for_each_entry(li, node, head, hlist) { 93219baf839SRobert Olsson if (new->plen > li->plen) 93319baf839SRobert Olsson break; 93419baf839SRobert Olsson 93519baf839SRobert Olsson last = li; 93619baf839SRobert Olsson } 93719baf839SRobert Olsson if (last) 9382373ce1cSRobert Olsson hlist_add_after_rcu(&last->hlist, &new->hlist); 93919baf839SRobert Olsson else 9402373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 94119baf839SRobert Olsson } 94219baf839SRobert Olsson } 94319baf839SRobert Olsson 9442373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 9452373ce1cSRobert Olsson 94619baf839SRobert Olsson static struct leaf * 94719baf839SRobert Olsson fib_find_node(struct trie *t, u32 key) 94819baf839SRobert Olsson { 94919baf839SRobert Olsson int pos; 95019baf839SRobert Olsson struct tnode *tn; 95119baf839SRobert Olsson struct node *n; 95219baf839SRobert Olsson 95319baf839SRobert Olsson pos = 0; 9542373ce1cSRobert Olsson n = rcu_dereference(t->trie); 95519baf839SRobert Olsson 95619baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 95719baf839SRobert Olsson tn = (struct tnode *) n; 95819baf839SRobert Olsson 95919baf839SRobert Olsson check_tnode(tn); 96019baf839SRobert Olsson 96119baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 96219baf839SRobert Olsson pos = tn->pos + tn->bits; 963a07f5f50SStephen Hemminger n = tnode_get_child_rcu(tn, 964a07f5f50SStephen Hemminger tkey_extract_bits(key, 965a07f5f50SStephen Hemminger tn->pos, 966a07f5f50SStephen Hemminger tn->bits)); 96791b9a277SOlof Johansson } else 96819baf839SRobert Olsson break; 96919baf839SRobert Olsson } 97019baf839SRobert Olsson /* Case we have found a leaf. Compare prefixes */ 97119baf839SRobert Olsson 97291b9a277SOlof Johansson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) 97391b9a277SOlof Johansson return (struct leaf *)n; 97491b9a277SOlof Johansson 97519baf839SRobert Olsson return NULL; 97619baf839SRobert Olsson } 97719baf839SRobert Olsson 97819baf839SRobert Olsson static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 97919baf839SRobert Olsson { 98019baf839SRobert Olsson int wasfull; 98106801916SStephen Hemminger t_key cindex, key = tn->key; 98206801916SStephen Hemminger struct tnode *tp; 98319baf839SRobert Olsson 98406801916SStephen Hemminger while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 98519baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 98619baf839SRobert Olsson wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 98719baf839SRobert Olsson tn = (struct tnode *) resize(t, (struct tnode *)tn); 988a07f5f50SStephen Hemminger 989a07f5f50SStephen Hemminger tnode_put_child_reorg((struct tnode *)tp, cindex, 990a07f5f50SStephen Hemminger (struct node *)tn, wasfull); 99119baf839SRobert Olsson 99206801916SStephen Hemminger tp = node_parent((struct node *) tn); 99306801916SStephen Hemminger if (!tp) 99419baf839SRobert Olsson break; 99506801916SStephen Hemminger tn = tp; 99619baf839SRobert Olsson } 99706801916SStephen Hemminger 99819baf839SRobert Olsson /* Handle last (top) tnode */ 99919baf839SRobert Olsson if (IS_TNODE(tn)) 100019baf839SRobert Olsson tn = (struct tnode *)resize(t, (struct tnode *)tn); 100119baf839SRobert Olsson 100219baf839SRobert Olsson return (struct node *)tn; 100319baf839SRobert Olsson } 100419baf839SRobert Olsson 10052373ce1cSRobert Olsson /* only used from updater-side */ 10062373ce1cSRobert Olsson 1007fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 100819baf839SRobert Olsson { 100919baf839SRobert Olsson int pos, newpos; 101019baf839SRobert Olsson struct tnode *tp = NULL, *tn = NULL; 101119baf839SRobert Olsson struct node *n; 101219baf839SRobert Olsson struct leaf *l; 101319baf839SRobert Olsson int missbit; 101419baf839SRobert Olsson struct list_head *fa_head = NULL; 101519baf839SRobert Olsson struct leaf_info *li; 101619baf839SRobert Olsson t_key cindex; 101719baf839SRobert Olsson 101819baf839SRobert Olsson pos = 0; 101919baf839SRobert Olsson n = t->trie; 102019baf839SRobert Olsson 102119baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 102219baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 102319baf839SRobert Olsson * and we should just put our new leaf in that. 102419baf839SRobert Olsson * If we point to a T_TNODE, check if it matches our key. Note that 102519baf839SRobert Olsson * a T_TNODE might be skipping any number of bits - its 'pos' need 102619baf839SRobert Olsson * not be the parent's 'pos'+'bits'! 102719baf839SRobert Olsson * 102819baf839SRobert Olsson * If it does match the current key, get pos/bits from it, extract 102919baf839SRobert Olsson * the index from our key, push the T_TNODE and walk the tree. 103019baf839SRobert Olsson * 103119baf839SRobert Olsson * If it doesn't, we have to replace it with a new T_TNODE. 103219baf839SRobert Olsson * 103319baf839SRobert Olsson * If we point to a T_LEAF, it might or might not have the same key 103419baf839SRobert Olsson * as we do. If it does, just change the value, update the T_LEAF's 103519baf839SRobert Olsson * value, and return it. 103619baf839SRobert Olsson * If it doesn't, we need to replace it with a T_TNODE. 103719baf839SRobert Olsson */ 103819baf839SRobert Olsson 103919baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 104019baf839SRobert Olsson tn = (struct tnode *) n; 104119baf839SRobert Olsson 104219baf839SRobert Olsson check_tnode(tn); 104319baf839SRobert Olsson 104419baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 104519baf839SRobert Olsson tp = tn; 104619baf839SRobert Olsson pos = tn->pos + tn->bits; 1047a07f5f50SStephen Hemminger n = tnode_get_child(tn, 1048a07f5f50SStephen Hemminger tkey_extract_bits(key, 1049a07f5f50SStephen Hemminger tn->pos, 1050a07f5f50SStephen Hemminger tn->bits)); 105119baf839SRobert Olsson 105206801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 105391b9a277SOlof Johansson } else 105419baf839SRobert Olsson break; 105519baf839SRobert Olsson } 105619baf839SRobert Olsson 105719baf839SRobert Olsson /* 105819baf839SRobert Olsson * n ----> NULL, LEAF or TNODE 105919baf839SRobert Olsson * 106019baf839SRobert Olsson * tp is n's (parent) ----> NULL or TNODE 106119baf839SRobert Olsson */ 106219baf839SRobert Olsson 106391b9a277SOlof Johansson BUG_ON(tp && IS_LEAF(tp)); 106419baf839SRobert Olsson 106519baf839SRobert Olsson /* Case 1: n is a leaf. Compare prefixes */ 106619baf839SRobert Olsson 106719baf839SRobert Olsson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1068c95aaf9aSStephen Hemminger l = (struct leaf *) n; 106919baf839SRobert Olsson li = leaf_info_new(plen); 107019baf839SRobert Olsson 1071fea86ad8SStephen Hemminger if (!li) 1072fea86ad8SStephen Hemminger return NULL; 107319baf839SRobert Olsson 107419baf839SRobert Olsson fa_head = &li->falh; 107519baf839SRobert Olsson insert_leaf_info(&l->list, li); 107619baf839SRobert Olsson goto done; 107719baf839SRobert Olsson } 107819baf839SRobert Olsson l = leaf_new(); 107919baf839SRobert Olsson 1080fea86ad8SStephen Hemminger if (!l) 1081fea86ad8SStephen Hemminger return NULL; 108219baf839SRobert Olsson 108319baf839SRobert Olsson l->key = key; 108419baf839SRobert Olsson li = leaf_info_new(plen); 108519baf839SRobert Olsson 1086f835e471SRobert Olsson if (!li) { 1087f835e471SRobert Olsson tnode_free((struct tnode *) l); 1088fea86ad8SStephen Hemminger return NULL; 1089f835e471SRobert Olsson } 109019baf839SRobert Olsson 109119baf839SRobert Olsson fa_head = &li->falh; 109219baf839SRobert Olsson insert_leaf_info(&l->list, li); 109319baf839SRobert Olsson 109419baf839SRobert Olsson if (t->trie && n == NULL) { 109591b9a277SOlof Johansson /* Case 2: n is NULL, and will just insert a new leaf */ 109619baf839SRobert Olsson 109706801916SStephen Hemminger node_set_parent((struct node *)l, tp); 109819baf839SRobert Olsson 109919baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 110019baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 110191b9a277SOlof Johansson } else { 110219baf839SRobert Olsson /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 110319baf839SRobert Olsson /* 110419baf839SRobert Olsson * Add a new tnode here 110519baf839SRobert Olsson * first tnode need some special handling 110619baf839SRobert Olsson */ 110719baf839SRobert Olsson 110819baf839SRobert Olsson if (tp) 110919baf839SRobert Olsson pos = tp->pos+tp->bits; 111019baf839SRobert Olsson else 111119baf839SRobert Olsson pos = 0; 111291b9a277SOlof Johansson 111319baf839SRobert Olsson if (n) { 111419baf839SRobert Olsson newpos = tkey_mismatch(key, pos, n->key); 111519baf839SRobert Olsson tn = tnode_new(n->key, newpos, 1); 111691b9a277SOlof Johansson } else { 111719baf839SRobert Olsson newpos = 0; 111819baf839SRobert Olsson tn = tnode_new(key, newpos, 1); /* First tnode */ 111919baf839SRobert Olsson } 1120f835e471SRobert Olsson 1121f835e471SRobert Olsson if (!tn) { 1122f835e471SRobert Olsson free_leaf_info(li); 1123f835e471SRobert Olsson tnode_free((struct tnode *) l); 1124fea86ad8SStephen Hemminger return NULL; 1125f835e471SRobert Olsson } 112619baf839SRobert Olsson 112706801916SStephen Hemminger node_set_parent((struct node *)tn, tp); 112819baf839SRobert Olsson 112919baf839SRobert Olsson missbit = tkey_extract_bits(key, newpos, 1); 113019baf839SRobert Olsson put_child(t, tn, missbit, (struct node *)l); 113119baf839SRobert Olsson put_child(t, tn, 1-missbit, n); 113219baf839SRobert Olsson 113319baf839SRobert Olsson if (tp) { 113419baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1135a07f5f50SStephen Hemminger put_child(t, (struct tnode *)tp, cindex, 1136a07f5f50SStephen Hemminger (struct node *)tn); 113791b9a277SOlof Johansson } else { 1138a07f5f50SStephen Hemminger rcu_assign_pointer(t->trie, (struct node *)tn); 113919baf839SRobert Olsson tp = tn; 114019baf839SRobert Olsson } 114119baf839SRobert Olsson } 114291b9a277SOlof Johansson 114391b9a277SOlof Johansson if (tp && tp->pos + tp->bits > 32) 1144a07f5f50SStephen Hemminger pr_warning("fib_trie" 1145a07f5f50SStephen Hemminger " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 114619baf839SRobert Olsson tp, tp->pos, tp->bits, key, plen); 114791b9a277SOlof Johansson 114819baf839SRobert Olsson /* Rebalance the trie */ 11492373ce1cSRobert Olsson 11502373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1151f835e471SRobert Olsson done: 115219baf839SRobert Olsson return fa_head; 115319baf839SRobert Olsson } 115419baf839SRobert Olsson 1155d562f1f8SRobert Olsson /* 1156d562f1f8SRobert Olsson * Caller must hold RTNL. 1157d562f1f8SRobert Olsson */ 11584e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 115919baf839SRobert Olsson { 116019baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 116119baf839SRobert Olsson struct fib_alias *fa, *new_fa; 116219baf839SRobert Olsson struct list_head *fa_head = NULL; 116319baf839SRobert Olsson struct fib_info *fi; 11644e902c57SThomas Graf int plen = cfg->fc_dst_len; 11654e902c57SThomas Graf u8 tos = cfg->fc_tos; 116619baf839SRobert Olsson u32 key, mask; 116719baf839SRobert Olsson int err; 116819baf839SRobert Olsson struct leaf *l; 116919baf839SRobert Olsson 117019baf839SRobert Olsson if (plen > 32) 117119baf839SRobert Olsson return -EINVAL; 117219baf839SRobert Olsson 11734e902c57SThomas Graf key = ntohl(cfg->fc_dst); 117419baf839SRobert Olsson 11752dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 117619baf839SRobert Olsson 117719baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 117819baf839SRobert Olsson 117919baf839SRobert Olsson if (key & ~mask) 118019baf839SRobert Olsson return -EINVAL; 118119baf839SRobert Olsson 118219baf839SRobert Olsson key = key & mask; 118319baf839SRobert Olsson 11844e902c57SThomas Graf fi = fib_create_info(cfg); 11854e902c57SThomas Graf if (IS_ERR(fi)) { 11864e902c57SThomas Graf err = PTR_ERR(fi); 118719baf839SRobert Olsson goto err; 11884e902c57SThomas Graf } 118919baf839SRobert Olsson 119019baf839SRobert Olsson l = fib_find_node(t, key); 119119baf839SRobert Olsson fa = NULL; 119219baf839SRobert Olsson 119319baf839SRobert Olsson if (l) { 119419baf839SRobert Olsson fa_head = get_fa_head(l, plen); 119519baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 119619baf839SRobert Olsson } 119719baf839SRobert Olsson 119819baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 119919baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 120019baf839SRobert Olsson * exists or to the node before which we will insert new one. 120119baf839SRobert Olsson * 120219baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 120319baf839SRobert Olsson * insert to the head of f. 120419baf839SRobert Olsson * 120519baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 120619baf839SRobert Olsson * and we need to allocate a new one of those as well. 120719baf839SRobert Olsson */ 120819baf839SRobert Olsson 120991b9a277SOlof Johansson if (fa && fa->fa_info->fib_priority == fi->fib_priority) { 121019baf839SRobert Olsson struct fib_alias *fa_orig; 121119baf839SRobert Olsson 121219baf839SRobert Olsson err = -EEXIST; 12134e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 121419baf839SRobert Olsson goto out; 121519baf839SRobert Olsson 12164e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 121719baf839SRobert Olsson struct fib_info *fi_drop; 121819baf839SRobert Olsson u8 state; 121919baf839SRobert Olsson 12206725033fSJoonwoo Park if (fi->fib_treeref > 1) 12216725033fSJoonwoo Park goto out; 12226725033fSJoonwoo Park 12232373ce1cSRobert Olsson err = -ENOBUFS; 1224e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 12252373ce1cSRobert Olsson if (new_fa == NULL) 12262373ce1cSRobert Olsson goto out; 122719baf839SRobert Olsson 122819baf839SRobert Olsson fi_drop = fa->fa_info; 12292373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12302373ce1cSRobert Olsson new_fa->fa_info = fi; 12314e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12324e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 123319baf839SRobert Olsson state = fa->fa_state; 12342373ce1cSRobert Olsson new_fa->fa_state &= ~FA_S_ACCESSED; 123519baf839SRobert Olsson 12362373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12372373ce1cSRobert Olsson alias_free_mem_rcu(fa); 123819baf839SRobert Olsson 123919baf839SRobert Olsson fib_release_info(fi_drop); 124019baf839SRobert Olsson if (state & FA_S_ACCESSED) 124119baf839SRobert Olsson rt_cache_flush(-1); 1242b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1243b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 124419baf839SRobert Olsson 124519baf839SRobert Olsson goto succeeded; 124619baf839SRobert Olsson } 124719baf839SRobert Olsson /* Error if we find a perfect match which 124819baf839SRobert Olsson * uses the same scope, type, and nexthop 124919baf839SRobert Olsson * information. 125019baf839SRobert Olsson */ 125119baf839SRobert Olsson fa_orig = fa; 125219baf839SRobert Olsson list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 125319baf839SRobert Olsson if (fa->fa_tos != tos) 125419baf839SRobert Olsson break; 125519baf839SRobert Olsson if (fa->fa_info->fib_priority != fi->fib_priority) 125619baf839SRobert Olsson break; 12574e902c57SThomas Graf if (fa->fa_type == cfg->fc_type && 12584e902c57SThomas Graf fa->fa_scope == cfg->fc_scope && 1259a07f5f50SStephen Hemminger fa->fa_info == fi) 126019baf839SRobert Olsson goto out; 126119baf839SRobert Olsson } 1262a07f5f50SStephen Hemminger 12634e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 126419baf839SRobert Olsson fa = fa_orig; 126519baf839SRobert Olsson } 126619baf839SRobert Olsson err = -ENOENT; 12674e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 126819baf839SRobert Olsson goto out; 126919baf839SRobert Olsson 127019baf839SRobert Olsson err = -ENOBUFS; 1271e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 127219baf839SRobert Olsson if (new_fa == NULL) 127319baf839SRobert Olsson goto out; 127419baf839SRobert Olsson 127519baf839SRobert Olsson new_fa->fa_info = fi; 127619baf839SRobert Olsson new_fa->fa_tos = tos; 12774e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12784e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 127919baf839SRobert Olsson new_fa->fa_state = 0; 128019baf839SRobert Olsson /* 128119baf839SRobert Olsson * Insert new entry to the list. 128219baf839SRobert Olsson */ 128319baf839SRobert Olsson 1284f835e471SRobert Olsson if (!fa_head) { 1285fea86ad8SStephen Hemminger fa_head = fib_insert_node(t, key, plen); 1286fea86ad8SStephen Hemminger if (unlikely(!fa_head)) { 1287fea86ad8SStephen Hemminger err = -ENOMEM; 1288f835e471SRobert Olsson goto out_free_new_fa; 1289f835e471SRobert Olsson } 1290fea86ad8SStephen Hemminger } 129119baf839SRobert Olsson 12922373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 12932373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 129419baf839SRobert Olsson 129519baf839SRobert Olsson rt_cache_flush(-1); 12964e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1297b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 129819baf839SRobert Olsson succeeded: 129919baf839SRobert Olsson return 0; 1300f835e471SRobert Olsson 1301f835e471SRobert Olsson out_free_new_fa: 1302f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 130319baf839SRobert Olsson out: 130419baf839SRobert Olsson fib_release_info(fi); 130591b9a277SOlof Johansson err: 130619baf839SRobert Olsson return err; 130719baf839SRobert Olsson } 130819baf839SRobert Olsson 13092373ce1cSRobert Olsson 1310772cb712SRobert Olsson /* should be called with rcu_read_lock */ 1311a07f5f50SStephen Hemminger static int check_leaf(struct trie *t, struct leaf *l, 1312a07f5f50SStephen Hemminger t_key key, const struct flowi *flp, 131306c74270SPatrick McHardy struct fib_result *res) 131419baf839SRobert Olsson { 131519baf839SRobert Olsson struct leaf_info *li; 131619baf839SRobert Olsson struct hlist_head *hhead = &l->list; 131719baf839SRobert Olsson struct hlist_node *node; 131819baf839SRobert Olsson 13192373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1320a07f5f50SStephen Hemminger int err; 1321a07f5f50SStephen Hemminger int plen = li->plen; 1322a07f5f50SStephen Hemminger __be32 mask = inet_make_mask(plen); 1323a07f5f50SStephen Hemminger 1324888454c5SAl Viro if (l->key != (key & ntohl(mask))) 132519baf839SRobert Olsson continue; 132619baf839SRobert Olsson 1327a07f5f50SStephen Hemminger err = fib_semantic_match(&li->falh, flp, res, 1328a07f5f50SStephen Hemminger htonl(l->key), mask, plen); 1329a07f5f50SStephen Hemminger 133019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1331a07f5f50SStephen Hemminger if (err <= 0) 133219baf839SRobert Olsson t->stats.semantic_match_passed++; 1333a07f5f50SStephen Hemminger else 133419baf839SRobert Olsson t->stats.semantic_match_miss++; 133519baf839SRobert Olsson #endif 1336a07f5f50SStephen Hemminger if (err <= 0) 1337a07f5f50SStephen Hemminger return plen; 133819baf839SRobert Olsson } 133919baf839SRobert Olsson 1340a07f5f50SStephen Hemminger return -1; 1341a07f5f50SStephen Hemminger } 1342a07f5f50SStephen Hemminger 1343a07f5f50SStephen Hemminger static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, 1344a07f5f50SStephen Hemminger struct fib_result *res) 134519baf839SRobert Olsson { 134619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 134719baf839SRobert Olsson int plen, ret = 0; 134819baf839SRobert Olsson struct node *n; 134919baf839SRobert Olsson struct tnode *pn; 135019baf839SRobert Olsson int pos, bits; 135119baf839SRobert Olsson t_key key = ntohl(flp->fl4_dst); 135219baf839SRobert Olsson int chopped_off; 135319baf839SRobert Olsson t_key cindex = 0; 135419baf839SRobert Olsson int current_prefix_length = KEYLENGTH; 135591b9a277SOlof Johansson struct tnode *cn; 135691b9a277SOlof Johansson t_key node_prefix, key_prefix, pref_mismatch; 135791b9a277SOlof Johansson int mp; 135891b9a277SOlof Johansson 13592373ce1cSRobert Olsson rcu_read_lock(); 136019baf839SRobert Olsson 13612373ce1cSRobert Olsson n = rcu_dereference(t->trie); 136219baf839SRobert Olsson if (!n) 136319baf839SRobert Olsson goto failed; 136419baf839SRobert Olsson 136519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 136619baf839SRobert Olsson t->stats.gets++; 136719baf839SRobert Olsson #endif 136819baf839SRobert Olsson 136919baf839SRobert Olsson /* Just a leaf? */ 137019baf839SRobert Olsson if (IS_LEAF(n)) { 1371a07f5f50SStephen Hemminger plen = check_leaf(t, (struct leaf *)n, key, flp, res); 1372a07f5f50SStephen Hemminger if (plen < 0) 137319baf839SRobert Olsson goto failed; 1374a07f5f50SStephen Hemminger ret = 0; 1375a07f5f50SStephen Hemminger goto found; 137619baf839SRobert Olsson } 1377a07f5f50SStephen Hemminger 137819baf839SRobert Olsson pn = (struct tnode *) n; 137919baf839SRobert Olsson chopped_off = 0; 138019baf839SRobert Olsson 138119baf839SRobert Olsson while (pn) { 138219baf839SRobert Olsson pos = pn->pos; 138319baf839SRobert Olsson bits = pn->bits; 138419baf839SRobert Olsson 138519baf839SRobert Olsson if (!chopped_off) 1386ab66b4a7SStephen Hemminger cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1387ab66b4a7SStephen Hemminger pos, bits); 138819baf839SRobert Olsson 138919baf839SRobert Olsson n = tnode_get_child(pn, cindex); 139019baf839SRobert Olsson 139119baf839SRobert Olsson if (n == NULL) { 139219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 139319baf839SRobert Olsson t->stats.null_node_hit++; 139419baf839SRobert Olsson #endif 139519baf839SRobert Olsson goto backtrace; 139619baf839SRobert Olsson } 139719baf839SRobert Olsson 139891b9a277SOlof Johansson if (IS_LEAF(n)) { 1399a07f5f50SStephen Hemminger plen = check_leaf(t, (struct leaf *)n, key, flp, res); 1400a07f5f50SStephen Hemminger if (plen < 0) 140191b9a277SOlof Johansson goto backtrace; 1402a07f5f50SStephen Hemminger 1403a07f5f50SStephen Hemminger ret = 0; 1404a07f5f50SStephen Hemminger goto found; 140591b9a277SOlof Johansson } 140691b9a277SOlof Johansson 140791b9a277SOlof Johansson cn = (struct tnode *)n; 140819baf839SRobert Olsson 140919baf839SRobert Olsson /* 141019baf839SRobert Olsson * It's a tnode, and we can do some extra checks here if we 141119baf839SRobert Olsson * like, to avoid descending into a dead-end branch. 141219baf839SRobert Olsson * This tnode is in the parent's child array at index 141319baf839SRobert Olsson * key[p_pos..p_pos+p_bits] but potentially with some bits 141419baf839SRobert Olsson * chopped off, so in reality the index may be just a 141519baf839SRobert Olsson * subprefix, padded with zero at the end. 141619baf839SRobert Olsson * We can also take a look at any skipped bits in this 141719baf839SRobert Olsson * tnode - everything up to p_pos is supposed to be ok, 141819baf839SRobert Olsson * and the non-chopped bits of the index (se previous 141919baf839SRobert Olsson * paragraph) are also guaranteed ok, but the rest is 142019baf839SRobert Olsson * considered unknown. 142119baf839SRobert Olsson * 142219baf839SRobert Olsson * The skipped bits are key[pos+bits..cn->pos]. 142319baf839SRobert Olsson */ 142419baf839SRobert Olsson 142519baf839SRobert Olsson /* If current_prefix_length < pos+bits, we are already doing 142619baf839SRobert Olsson * actual prefix matching, which means everything from 142719baf839SRobert Olsson * pos+(bits-chopped_off) onward must be zero along some 142819baf839SRobert Olsson * branch of this subtree - otherwise there is *no* valid 142919baf839SRobert Olsson * prefix present. Here we can only check the skipped 143019baf839SRobert Olsson * bits. Remember, since we have already indexed into the 143119baf839SRobert Olsson * parent's child array, we know that the bits we chopped of 143219baf839SRobert Olsson * *are* zero. 143319baf839SRobert Olsson */ 143419baf839SRobert Olsson 1435a07f5f50SStephen Hemminger /* NOTA BENE: Checking only skipped bits 1436a07f5f50SStephen Hemminger for the new node here */ 143719baf839SRobert Olsson 143819baf839SRobert Olsson if (current_prefix_length < pos+bits) { 143919baf839SRobert Olsson if (tkey_extract_bits(cn->key, current_prefix_length, 1440a07f5f50SStephen Hemminger cn->pos - current_prefix_length) 1441a07f5f50SStephen Hemminger || !(cn->child[0])) 144219baf839SRobert Olsson goto backtrace; 144319baf839SRobert Olsson } 144419baf839SRobert Olsson 144519baf839SRobert Olsson /* 144619baf839SRobert Olsson * If chopped_off=0, the index is fully validated and we 144719baf839SRobert Olsson * only need to look at the skipped bits for this, the new, 144819baf839SRobert Olsson * tnode. What we actually want to do is to find out if 144919baf839SRobert Olsson * these skipped bits match our key perfectly, or if we will 145019baf839SRobert Olsson * have to count on finding a matching prefix further down, 145119baf839SRobert Olsson * because if we do, we would like to have some way of 145219baf839SRobert Olsson * verifying the existence of such a prefix at this point. 145319baf839SRobert Olsson */ 145419baf839SRobert Olsson 145519baf839SRobert Olsson /* The only thing we can do at this point is to verify that 145619baf839SRobert Olsson * any such matching prefix can indeed be a prefix to our 145719baf839SRobert Olsson * key, and if the bits in the node we are inspecting that 145819baf839SRobert Olsson * do not match our key are not ZERO, this cannot be true. 145919baf839SRobert Olsson * Thus, find out where there is a mismatch (before cn->pos) 146019baf839SRobert Olsson * and verify that all the mismatching bits are zero in the 146119baf839SRobert Olsson * new tnode's key. 146219baf839SRobert Olsson */ 146319baf839SRobert Olsson 1464a07f5f50SStephen Hemminger /* 1465a07f5f50SStephen Hemminger * Note: We aren't very concerned about the piece of 1466a07f5f50SStephen Hemminger * the key that precede pn->pos+pn->bits, since these 1467a07f5f50SStephen Hemminger * have already been checked. The bits after cn->pos 1468a07f5f50SStephen Hemminger * aren't checked since these are by definition 1469a07f5f50SStephen Hemminger * "unknown" at this point. Thus, what we want to see 1470a07f5f50SStephen Hemminger * is if we are about to enter the "prefix matching" 1471a07f5f50SStephen Hemminger * state, and in that case verify that the skipped 1472a07f5f50SStephen Hemminger * bits that will prevail throughout this subtree are 1473a07f5f50SStephen Hemminger * zero, as they have to be if we are to find a 1474a07f5f50SStephen Hemminger * matching prefix. 147519baf839SRobert Olsson */ 147619baf839SRobert Olsson 1477ab66b4a7SStephen Hemminger node_prefix = mask_pfx(cn->key, cn->pos); 1478ab66b4a7SStephen Hemminger key_prefix = mask_pfx(key, cn->pos); 147919baf839SRobert Olsson pref_mismatch = key_prefix^node_prefix; 148019baf839SRobert Olsson mp = 0; 148119baf839SRobert Olsson 1482a07f5f50SStephen Hemminger /* 1483a07f5f50SStephen Hemminger * In short: If skipped bits in this node do not match 1484a07f5f50SStephen Hemminger * the search key, enter the "prefix matching" 1485a07f5f50SStephen Hemminger * state.directly. 148619baf839SRobert Olsson */ 148719baf839SRobert Olsson if (pref_mismatch) { 148819baf839SRobert Olsson while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 148919baf839SRobert Olsson mp++; 149019baf839SRobert Olsson pref_mismatch = pref_mismatch << 1; 149119baf839SRobert Olsson } 149219baf839SRobert Olsson key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 149319baf839SRobert Olsson 149419baf839SRobert Olsson if (key_prefix != 0) 149519baf839SRobert Olsson goto backtrace; 149619baf839SRobert Olsson 149719baf839SRobert Olsson if (current_prefix_length >= cn->pos) 149819baf839SRobert Olsson current_prefix_length = mp; 149919baf839SRobert Olsson } 1500a07f5f50SStephen Hemminger 150119baf839SRobert Olsson pn = (struct tnode *)n; /* Descend */ 150219baf839SRobert Olsson chopped_off = 0; 150319baf839SRobert Olsson continue; 150491b9a277SOlof Johansson 150519baf839SRobert Olsson backtrace: 150619baf839SRobert Olsson chopped_off++; 150719baf839SRobert Olsson 150819baf839SRobert Olsson /* As zero don't change the child key (cindex) */ 1509a07f5f50SStephen Hemminger while ((chopped_off <= pn->bits) 1510a07f5f50SStephen Hemminger && !(cindex & (1<<(chopped_off-1)))) 151119baf839SRobert Olsson chopped_off++; 151219baf839SRobert Olsson 151319baf839SRobert Olsson /* Decrease current_... with bits chopped off */ 151419baf839SRobert Olsson if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1515a07f5f50SStephen Hemminger current_prefix_length = pn->pos + pn->bits 1516a07f5f50SStephen Hemminger - chopped_off; 151719baf839SRobert Olsson 151819baf839SRobert Olsson /* 151919baf839SRobert Olsson * Either we do the actual chop off according or if we have 152019baf839SRobert Olsson * chopped off all bits in this tnode walk up to our parent. 152119baf839SRobert Olsson */ 152219baf839SRobert Olsson 152391b9a277SOlof Johansson if (chopped_off <= pn->bits) { 152419baf839SRobert Olsson cindex &= ~(1 << (chopped_off-1)); 152591b9a277SOlof Johansson } else { 152606801916SStephen Hemminger struct tnode *parent = node_parent((struct node *) pn); 152706801916SStephen Hemminger if (!parent) 152819baf839SRobert Olsson goto failed; 152919baf839SRobert Olsson 153019baf839SRobert Olsson /* Get Child's index */ 153106801916SStephen Hemminger cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); 153206801916SStephen Hemminger pn = parent; 153319baf839SRobert Olsson chopped_off = 0; 153419baf839SRobert Olsson 153519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 153619baf839SRobert Olsson t->stats.backtrack++; 153719baf839SRobert Olsson #endif 153819baf839SRobert Olsson goto backtrace; 153919baf839SRobert Olsson } 154019baf839SRobert Olsson } 154119baf839SRobert Olsson failed: 154219baf839SRobert Olsson ret = 1; 154319baf839SRobert Olsson found: 15442373ce1cSRobert Olsson rcu_read_unlock(); 154519baf839SRobert Olsson return ret; 154619baf839SRobert Olsson } 154719baf839SRobert Olsson 15482373ce1cSRobert Olsson /* only called from updater side */ 154919baf839SRobert Olsson static int trie_leaf_remove(struct trie *t, t_key key) 155019baf839SRobert Olsson { 155119baf839SRobert Olsson t_key cindex; 155219baf839SRobert Olsson struct tnode *tp = NULL; 155319baf839SRobert Olsson struct node *n = t->trie; 155419baf839SRobert Olsson struct leaf *l; 155519baf839SRobert Olsson 15560c7770c7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", n); 155719baf839SRobert Olsson 155819baf839SRobert Olsson /* Note that in the case skipped bits, those bits are *not* checked! 155919baf839SRobert Olsson * When we finish this, we will have NULL or a T_LEAF, and the 156019baf839SRobert Olsson * T_LEAF may or may not match our key. 156119baf839SRobert Olsson */ 156219baf839SRobert Olsson 156319baf839SRobert Olsson while (n != NULL && IS_TNODE(n)) { 156419baf839SRobert Olsson struct tnode *tn = (struct tnode *) n; 156519baf839SRobert Olsson check_tnode(tn); 1566a07f5f50SStephen Hemminger n = tnode_get_child(tn, tkey_extract_bits(key, 1567a07f5f50SStephen Hemminger tn->pos, tn->bits)); 156819baf839SRobert Olsson 156906801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 157019baf839SRobert Olsson } 157119baf839SRobert Olsson l = (struct leaf *) n; 157219baf839SRobert Olsson 157319baf839SRobert Olsson if (!n || !tkey_equals(l->key, key)) 157419baf839SRobert Olsson return 0; 157519baf839SRobert Olsson 157619baf839SRobert Olsson /* 157719baf839SRobert Olsson * Key found. 157819baf839SRobert Olsson * Remove the leaf and rebalance the tree 157919baf839SRobert Olsson */ 158006801916SStephen Hemminger tp = node_parent(n); 158119baf839SRobert Olsson tnode_free((struct tnode *) n); 158219baf839SRobert Olsson 158319baf839SRobert Olsson if (tp) { 158419baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 158519baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, NULL); 15862373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 158791b9a277SOlof Johansson } else 15882373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 158919baf839SRobert Olsson 159019baf839SRobert Olsson return 1; 159119baf839SRobert Olsson } 159219baf839SRobert Olsson 1593d562f1f8SRobert Olsson /* 1594d562f1f8SRobert Olsson * Caller must hold RTNL. 1595d562f1f8SRobert Olsson */ 15964e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 159719baf839SRobert Olsson { 159819baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 159919baf839SRobert Olsson u32 key, mask; 16004e902c57SThomas Graf int plen = cfg->fc_dst_len; 16014e902c57SThomas Graf u8 tos = cfg->fc_tos; 160219baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 160319baf839SRobert Olsson struct list_head *fa_head; 160419baf839SRobert Olsson struct leaf *l; 160591b9a277SOlof Johansson struct leaf_info *li; 160691b9a277SOlof Johansson 160719baf839SRobert Olsson if (plen > 32) 160819baf839SRobert Olsson return -EINVAL; 160919baf839SRobert Olsson 16104e902c57SThomas Graf key = ntohl(cfg->fc_dst); 161119baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 161219baf839SRobert Olsson 161319baf839SRobert Olsson if (key & ~mask) 161419baf839SRobert Olsson return -EINVAL; 161519baf839SRobert Olsson 161619baf839SRobert Olsson key = key & mask; 161719baf839SRobert Olsson l = fib_find_node(t, key); 161819baf839SRobert Olsson 161919baf839SRobert Olsson if (!l) 162019baf839SRobert Olsson return -ESRCH; 162119baf839SRobert Olsson 162219baf839SRobert Olsson fa_head = get_fa_head(l, plen); 162319baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 162419baf839SRobert Olsson 162519baf839SRobert Olsson if (!fa) 162619baf839SRobert Olsson return -ESRCH; 162719baf839SRobert Olsson 16280c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 162919baf839SRobert Olsson 163019baf839SRobert Olsson fa_to_delete = NULL; 163119baf839SRobert Olsson fa_head = fa->fa_list.prev; 16322373ce1cSRobert Olsson 163319baf839SRobert Olsson list_for_each_entry(fa, fa_head, fa_list) { 163419baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 163519baf839SRobert Olsson 163619baf839SRobert Olsson if (fa->fa_tos != tos) 163719baf839SRobert Olsson break; 163819baf839SRobert Olsson 16394e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 16404e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 16414e902c57SThomas Graf fa->fa_scope == cfg->fc_scope) && 16424e902c57SThomas Graf (!cfg->fc_protocol || 16434e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 16444e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 164519baf839SRobert Olsson fa_to_delete = fa; 164619baf839SRobert Olsson break; 164719baf839SRobert Olsson } 164819baf839SRobert Olsson } 164919baf839SRobert Olsson 165091b9a277SOlof Johansson if (!fa_to_delete) 165191b9a277SOlof Johansson return -ESRCH; 165219baf839SRobert Olsson 165319baf839SRobert Olsson fa = fa_to_delete; 16544e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1655b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 165619baf839SRobert Olsson 165719baf839SRobert Olsson l = fib_find_node(t, key); 1658772cb712SRobert Olsson li = find_leaf_info(l, plen); 165919baf839SRobert Olsson 16602373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 166119baf839SRobert Olsson 166219baf839SRobert Olsson if (list_empty(fa_head)) { 16632373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 166419baf839SRobert Olsson free_leaf_info(li); 16652373ce1cSRobert Olsson } 166619baf839SRobert Olsson 166719baf839SRobert Olsson if (hlist_empty(&l->list)) 166819baf839SRobert Olsson trie_leaf_remove(t, key); 166919baf839SRobert Olsson 167019baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 167119baf839SRobert Olsson rt_cache_flush(-1); 167219baf839SRobert Olsson 16732373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16742373ce1cSRobert Olsson alias_free_mem_rcu(fa); 167519baf839SRobert Olsson return 0; 167619baf839SRobert Olsson } 167719baf839SRobert Olsson 167819baf839SRobert Olsson static int trie_flush_list(struct trie *t, struct list_head *head) 167919baf839SRobert Olsson { 168019baf839SRobert Olsson struct fib_alias *fa, *fa_node; 168119baf839SRobert Olsson int found = 0; 168219baf839SRobert Olsson 168319baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 168419baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 168519baf839SRobert Olsson 168619baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 16872373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 16882373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16892373ce1cSRobert Olsson alias_free_mem_rcu(fa); 169019baf839SRobert Olsson found++; 169119baf839SRobert Olsson } 169219baf839SRobert Olsson } 169319baf839SRobert Olsson return found; 169419baf839SRobert Olsson } 169519baf839SRobert Olsson 169619baf839SRobert Olsson static int trie_flush_leaf(struct trie *t, struct leaf *l) 169719baf839SRobert Olsson { 169819baf839SRobert Olsson int found = 0; 169919baf839SRobert Olsson struct hlist_head *lih = &l->list; 170019baf839SRobert Olsson struct hlist_node *node, *tmp; 170119baf839SRobert Olsson struct leaf_info *li = NULL; 170219baf839SRobert Olsson 170319baf839SRobert Olsson hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 170419baf839SRobert Olsson found += trie_flush_list(t, &li->falh); 170519baf839SRobert Olsson 170619baf839SRobert Olsson if (list_empty(&li->falh)) { 17072373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 170819baf839SRobert Olsson free_leaf_info(li); 170919baf839SRobert Olsson } 171019baf839SRobert Olsson } 171119baf839SRobert Olsson return found; 171219baf839SRobert Olsson } 171319baf839SRobert Olsson 17142373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 17152373ce1cSRobert Olsson 171619baf839SRobert Olsson static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 171719baf839SRobert Olsson { 171819baf839SRobert Olsson struct node *c = (struct node *) thisleaf; 171919baf839SRobert Olsson struct tnode *p; 172019baf839SRobert Olsson int idx; 17212373ce1cSRobert Olsson struct node *trie = rcu_dereference(t->trie); 172219baf839SRobert Olsson 172319baf839SRobert Olsson if (c == NULL) { 17242373ce1cSRobert Olsson if (trie == NULL) 172519baf839SRobert Olsson return NULL; 172619baf839SRobert Olsson 17272373ce1cSRobert Olsson if (IS_LEAF(trie)) /* trie w. just a leaf */ 17282373ce1cSRobert Olsson return (struct leaf *) trie; 172919baf839SRobert Olsson 17302373ce1cSRobert Olsson p = (struct tnode *)trie; /* Start */ 173191b9a277SOlof Johansson } else 1732b59cfbf7SEric Dumazet p = node_parent_rcu(c); 1733c877efb2SStephen Hemminger 173419baf839SRobert Olsson while (p) { 173519baf839SRobert Olsson int pos, last; 173619baf839SRobert Olsson 173719baf839SRobert Olsson /* Find the next child of the parent */ 173819baf839SRobert Olsson if (c) 173919baf839SRobert Olsson pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 174019baf839SRobert Olsson else 174119baf839SRobert Olsson pos = 0; 174219baf839SRobert Olsson 174319baf839SRobert Olsson last = 1 << p->bits; 174419baf839SRobert Olsson for (idx = pos; idx < last ; idx++) { 17452373ce1cSRobert Olsson c = rcu_dereference(p->child[idx]); 17462373ce1cSRobert Olsson 17472373ce1cSRobert Olsson if (!c) 174891b9a277SOlof Johansson continue; 174919baf839SRobert Olsson 175019baf839SRobert Olsson /* Decend if tnode */ 17512373ce1cSRobert Olsson while (IS_TNODE(c)) { 17522373ce1cSRobert Olsson p = (struct tnode *) c; 175319baf839SRobert Olsson idx = 0; 175419baf839SRobert Olsson 175519baf839SRobert Olsson /* Rightmost non-NULL branch */ 175619baf839SRobert Olsson if (p && IS_TNODE(p)) 17572373ce1cSRobert Olsson while (!(c = rcu_dereference(p->child[idx])) 17582373ce1cSRobert Olsson && idx < (1<<p->bits)) idx++; 175919baf839SRobert Olsson 176019baf839SRobert Olsson /* Done with this tnode? */ 17612373ce1cSRobert Olsson if (idx >= (1 << p->bits) || !c) 176219baf839SRobert Olsson goto up; 176319baf839SRobert Olsson } 17642373ce1cSRobert Olsson return (struct leaf *) c; 176519baf839SRobert Olsson } 176619baf839SRobert Olsson up: 176719baf839SRobert Olsson /* No more children go up one step */ 176819baf839SRobert Olsson c = (struct node *) p; 1769b59cfbf7SEric Dumazet p = node_parent_rcu(c); 177019baf839SRobert Olsson } 177119baf839SRobert Olsson return NULL; /* Ready. Root of trie */ 177219baf839SRobert Olsson } 177319baf839SRobert Olsson 1774d562f1f8SRobert Olsson /* 1775d562f1f8SRobert Olsson * Caller must hold RTNL. 1776d562f1f8SRobert Olsson */ 177719baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb) 177819baf839SRobert Olsson { 177919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 178019baf839SRobert Olsson struct leaf *ll = NULL, *l = NULL; 178119baf839SRobert Olsson int found = 0, h; 178219baf839SRobert Olsson 178319baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 178419baf839SRobert Olsson found += trie_flush_leaf(t, l); 178519baf839SRobert Olsson 178619baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 178719baf839SRobert Olsson trie_leaf_remove(t, ll->key); 178819baf839SRobert Olsson ll = l; 178919baf839SRobert Olsson } 179019baf839SRobert Olsson 179119baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 179219baf839SRobert Olsson trie_leaf_remove(t, ll->key); 179319baf839SRobert Olsson 17940c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 179519baf839SRobert Olsson return found; 179619baf839SRobert Olsson } 179719baf839SRobert Olsson 1798a07f5f50SStephen Hemminger static void fn_trie_select_default(struct fib_table *tb, 1799a07f5f50SStephen Hemminger const struct flowi *flp, 1800a07f5f50SStephen Hemminger struct fib_result *res) 180119baf839SRobert Olsson { 180219baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 180319baf839SRobert Olsson int order, last_idx; 180419baf839SRobert Olsson struct fib_info *fi = NULL; 180519baf839SRobert Olsson struct fib_info *last_resort; 180619baf839SRobert Olsson struct fib_alias *fa = NULL; 180719baf839SRobert Olsson struct list_head *fa_head; 180819baf839SRobert Olsson struct leaf *l; 180919baf839SRobert Olsson 181019baf839SRobert Olsson last_idx = -1; 181119baf839SRobert Olsson last_resort = NULL; 181219baf839SRobert Olsson order = -1; 181319baf839SRobert Olsson 18142373ce1cSRobert Olsson rcu_read_lock(); 181519baf839SRobert Olsson 181619baf839SRobert Olsson l = fib_find_node(t, 0); 181719baf839SRobert Olsson if (!l) 181819baf839SRobert Olsson goto out; 181919baf839SRobert Olsson 182019baf839SRobert Olsson fa_head = get_fa_head(l, 0); 182119baf839SRobert Olsson if (!fa_head) 182219baf839SRobert Olsson goto out; 182319baf839SRobert Olsson 182419baf839SRobert Olsson if (list_empty(fa_head)) 182519baf839SRobert Olsson goto out; 182619baf839SRobert Olsson 18272373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fa_head, fa_list) { 182819baf839SRobert Olsson struct fib_info *next_fi = fa->fa_info; 182919baf839SRobert Olsson 183019baf839SRobert Olsson if (fa->fa_scope != res->scope || 183119baf839SRobert Olsson fa->fa_type != RTN_UNICAST) 183219baf839SRobert Olsson continue; 183319baf839SRobert Olsson 183419baf839SRobert Olsson if (next_fi->fib_priority > res->fi->fib_priority) 183519baf839SRobert Olsson break; 183619baf839SRobert Olsson if (!next_fi->fib_nh[0].nh_gw || 183719baf839SRobert Olsson next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 183819baf839SRobert Olsson continue; 183919baf839SRobert Olsson fa->fa_state |= FA_S_ACCESSED; 184019baf839SRobert Olsson 184119baf839SRobert Olsson if (fi == NULL) { 184219baf839SRobert Olsson if (next_fi != res->fi) 184319baf839SRobert Olsson break; 184419baf839SRobert Olsson } else if (!fib_detect_death(fi, order, &last_resort, 1845971b893eSDenis V. Lunev &last_idx, tb->tb_default)) { 1846a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1847971b893eSDenis V. Lunev tb->tb_default = order; 184819baf839SRobert Olsson goto out; 184919baf839SRobert Olsson } 185019baf839SRobert Olsson fi = next_fi; 185119baf839SRobert Olsson order++; 185219baf839SRobert Olsson } 185319baf839SRobert Olsson if (order <= 0 || fi == NULL) { 1854971b893eSDenis V. Lunev tb->tb_default = -1; 185519baf839SRobert Olsson goto out; 185619baf839SRobert Olsson } 185719baf839SRobert Olsson 1858971b893eSDenis V. Lunev if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1859971b893eSDenis V. Lunev tb->tb_default)) { 1860a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1861971b893eSDenis V. Lunev tb->tb_default = order; 186219baf839SRobert Olsson goto out; 186319baf839SRobert Olsson } 1864a2bbe682SDenis V. Lunev if (last_idx >= 0) 1865a2bbe682SDenis V. Lunev fib_result_assign(res, last_resort); 1866971b893eSDenis V. Lunev tb->tb_default = last_idx; 1867971b893eSDenis V. Lunev out: 18682373ce1cSRobert Olsson rcu_read_unlock(); 186919baf839SRobert Olsson } 187019baf839SRobert Olsson 1871a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1872a07f5f50SStephen Hemminger struct fib_table *tb, 187319baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 187419baf839SRobert Olsson { 187519baf839SRobert Olsson int i, s_i; 187619baf839SRobert Olsson struct fib_alias *fa; 187719baf839SRobert Olsson 187832ab5f80SAl Viro __be32 xkey = htonl(key); 187919baf839SRobert Olsson 18801af5a8c4SPatrick McHardy s_i = cb->args[4]; 188119baf839SRobert Olsson i = 0; 188219baf839SRobert Olsson 18832373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 18842373ce1cSRobert Olsson 18852373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 188619baf839SRobert Olsson if (i < s_i) { 188719baf839SRobert Olsson i++; 188819baf839SRobert Olsson continue; 188919baf839SRobert Olsson } 189078c6671aSStephen Hemminger BUG_ON(!fa->fa_info); 189119baf839SRobert Olsson 189219baf839SRobert Olsson if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 189319baf839SRobert Olsson cb->nlh->nlmsg_seq, 189419baf839SRobert Olsson RTM_NEWROUTE, 189519baf839SRobert Olsson tb->tb_id, 189619baf839SRobert Olsson fa->fa_type, 189719baf839SRobert Olsson fa->fa_scope, 1898be403ea1SThomas Graf xkey, 189919baf839SRobert Olsson plen, 190019baf839SRobert Olsson fa->fa_tos, 190164347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 19021af5a8c4SPatrick McHardy cb->args[4] = i; 190319baf839SRobert Olsson return -1; 190419baf839SRobert Olsson } 190519baf839SRobert Olsson i++; 190619baf839SRobert Olsson } 19071af5a8c4SPatrick McHardy cb->args[4] = i; 190819baf839SRobert Olsson return skb->len; 190919baf839SRobert Olsson } 191019baf839SRobert Olsson 1911a07f5f50SStephen Hemminger static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, 1912a07f5f50SStephen Hemminger struct sk_buff *skb, struct netlink_callback *cb) 191319baf839SRobert Olsson { 191419baf839SRobert Olsson int h, s_h; 191519baf839SRobert Olsson struct list_head *fa_head; 191619baf839SRobert Olsson struct leaf *l = NULL; 191791b9a277SOlof Johansson 19181af5a8c4SPatrick McHardy s_h = cb->args[3]; 191919baf839SRobert Olsson 192019baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 192119baf839SRobert Olsson if (h < s_h) 192219baf839SRobert Olsson continue; 192319baf839SRobert Olsson if (h > s_h) 19241af5a8c4SPatrick McHardy memset(&cb->args[4], 0, 19251af5a8c4SPatrick McHardy sizeof(cb->args) - 4*sizeof(cb->args[0])); 192619baf839SRobert Olsson 192719baf839SRobert Olsson fa_head = get_fa_head(l, plen); 192819baf839SRobert Olsson 192919baf839SRobert Olsson if (!fa_head) 193019baf839SRobert Olsson continue; 193119baf839SRobert Olsson 193219baf839SRobert Olsson if (list_empty(fa_head)) 193319baf839SRobert Olsson continue; 193419baf839SRobert Olsson 193519baf839SRobert Olsson if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb) < 0) { 19361af5a8c4SPatrick McHardy cb->args[3] = h; 193719baf839SRobert Olsson return -1; 193819baf839SRobert Olsson } 193919baf839SRobert Olsson } 19401af5a8c4SPatrick McHardy cb->args[3] = h; 194119baf839SRobert Olsson return skb->len; 194219baf839SRobert Olsson } 194319baf839SRobert Olsson 1944a07f5f50SStephen Hemminger static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, 1945a07f5f50SStephen Hemminger struct netlink_callback *cb) 194619baf839SRobert Olsson { 194719baf839SRobert Olsson int m, s_m; 194819baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 194919baf839SRobert Olsson 19501af5a8c4SPatrick McHardy s_m = cb->args[2]; 195119baf839SRobert Olsson 19522373ce1cSRobert Olsson rcu_read_lock(); 195319baf839SRobert Olsson for (m = 0; m <= 32; m++) { 195419baf839SRobert Olsson if (m < s_m) 195519baf839SRobert Olsson continue; 195619baf839SRobert Olsson if (m > s_m) 19571af5a8c4SPatrick McHardy memset(&cb->args[3], 0, 19581af5a8c4SPatrick McHardy sizeof(cb->args) - 3*sizeof(cb->args[0])); 195919baf839SRobert Olsson 196019baf839SRobert Olsson if (fn_trie_dump_plen(t, 32-m, tb, skb, cb) < 0) { 19611af5a8c4SPatrick McHardy cb->args[2] = m; 196219baf839SRobert Olsson goto out; 196319baf839SRobert Olsson } 196419baf839SRobert Olsson } 19652373ce1cSRobert Olsson rcu_read_unlock(); 19661af5a8c4SPatrick McHardy cb->args[2] = m; 196719baf839SRobert Olsson return skb->len; 196819baf839SRobert Olsson out: 19692373ce1cSRobert Olsson rcu_read_unlock(); 197019baf839SRobert Olsson return -1; 197119baf839SRobert Olsson } 197219baf839SRobert Olsson 19737f9b8052SStephen Hemminger void __init fib_hash_init(void) 19747f9b8052SStephen Hemminger { 1975a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1976a07f5f50SStephen Hemminger sizeof(struct fib_alias), 1977bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 1978bc3c8c1eSStephen Hemminger 1979bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 1980bc3c8c1eSStephen Hemminger max(sizeof(struct leaf), 1981bc3c8c1eSStephen Hemminger sizeof(struct leaf_info)), 1982bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 19837f9b8052SStephen Hemminger } 198419baf839SRobert Olsson 19857f9b8052SStephen Hemminger 19867f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */ 19877f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id) 198819baf839SRobert Olsson { 198919baf839SRobert Olsson struct fib_table *tb; 199019baf839SRobert Olsson struct trie *t; 199119baf839SRobert Olsson 199219baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 199319baf839SRobert Olsson GFP_KERNEL); 199419baf839SRobert Olsson if (tb == NULL) 199519baf839SRobert Olsson return NULL; 199619baf839SRobert Olsson 199719baf839SRobert Olsson tb->tb_id = id; 1998971b893eSDenis V. Lunev tb->tb_default = -1; 199919baf839SRobert Olsson tb->tb_lookup = fn_trie_lookup; 200019baf839SRobert Olsson tb->tb_insert = fn_trie_insert; 200119baf839SRobert Olsson tb->tb_delete = fn_trie_delete; 200219baf839SRobert Olsson tb->tb_flush = fn_trie_flush; 200319baf839SRobert Olsson tb->tb_select_default = fn_trie_select_default; 200419baf839SRobert Olsson tb->tb_dump = fn_trie_dump; 200519baf839SRobert Olsson 200619baf839SRobert Olsson t = (struct trie *) tb->tb_data; 2007c28a1cf4SStephen Hemminger memset(t, 0, sizeof(*t)); 200819baf839SRobert Olsson 200919baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 2010a07f5f50SStephen Hemminger pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION); 201119baf839SRobert Olsson 201219baf839SRobert Olsson return tb; 201319baf839SRobert Olsson } 201419baf839SRobert Olsson 2015cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2016cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2017cb7b593cSStephen Hemminger struct fib_trie_iter { 20181c340b2fSDenis V. Lunev struct seq_net_private p; 2019877a9bffSEric W. Biederman struct trie *trie_local, *trie_main; 2020cb7b593cSStephen Hemminger struct tnode *tnode; 2021cb7b593cSStephen Hemminger struct trie *trie; 2022cb7b593cSStephen Hemminger unsigned index; 2023cb7b593cSStephen Hemminger unsigned depth; 2024cb7b593cSStephen Hemminger }; 202519baf839SRobert Olsson 2026cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 202719baf839SRobert Olsson { 2028cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 2029cb7b593cSStephen Hemminger unsigned cindex = iter->index; 2030cb7b593cSStephen Hemminger struct tnode *p; 203119baf839SRobert Olsson 20326640e697SEric W. Biederman /* A single entry routing table */ 20336640e697SEric W. Biederman if (!tn) 20346640e697SEric W. Biederman return NULL; 20356640e697SEric W. Biederman 2036cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2037cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 2038cb7b593cSStephen Hemminger rescan: 2039cb7b593cSStephen Hemminger while (cindex < (1<<tn->bits)) { 2040b59cfbf7SEric Dumazet struct node *n = tnode_get_child_rcu(tn, cindex); 204119baf839SRobert Olsson 2042cb7b593cSStephen Hemminger if (n) { 204319baf839SRobert Olsson if (IS_LEAF(n)) { 2044cb7b593cSStephen Hemminger iter->tnode = tn; 2045cb7b593cSStephen Hemminger iter->index = cindex + 1; 204691b9a277SOlof Johansson } else { 2047cb7b593cSStephen Hemminger /* push down one level */ 2048cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2049cb7b593cSStephen Hemminger iter->index = 0; 2050cb7b593cSStephen Hemminger ++iter->depth; 205119baf839SRobert Olsson } 2052cb7b593cSStephen Hemminger return n; 205319baf839SRobert Olsson } 205419baf839SRobert Olsson 2055cb7b593cSStephen Hemminger ++cindex; 2056cb7b593cSStephen Hemminger } 2057cb7b593cSStephen Hemminger 2058cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 2059b59cfbf7SEric Dumazet p = node_parent_rcu((struct node *)tn); 2060cb7b593cSStephen Hemminger if (p) { 2061cb7b593cSStephen Hemminger cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2062cb7b593cSStephen Hemminger tn = p; 2063cb7b593cSStephen Hemminger --iter->depth; 2064cb7b593cSStephen Hemminger goto rescan; 2065cb7b593cSStephen Hemminger } 2066cb7b593cSStephen Hemminger 2067cb7b593cSStephen Hemminger /* got root? */ 2068cb7b593cSStephen Hemminger return NULL; 2069cb7b593cSStephen Hemminger } 2070cb7b593cSStephen Hemminger 2071cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2072cb7b593cSStephen Hemminger struct trie *t) 2073cb7b593cSStephen Hemminger { 20745ddf0eb2SRobert Olsson struct node *n ; 20755ddf0eb2SRobert Olsson 20765ddf0eb2SRobert Olsson if (!t) 20775ddf0eb2SRobert Olsson return NULL; 20785ddf0eb2SRobert Olsson 20795ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 20805ddf0eb2SRobert Olsson 20815ddf0eb2SRobert Olsson if (!iter) 20825ddf0eb2SRobert Olsson return NULL; 2083cb7b593cSStephen Hemminger 20846640e697SEric W. Biederman if (n) { 20856640e697SEric W. Biederman if (IS_TNODE(n)) { 2086cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2087cb7b593cSStephen Hemminger iter->trie = t; 2088cb7b593cSStephen Hemminger iter->index = 0; 20891d25cd6cSRobert Olsson iter->depth = 1; 20906640e697SEric W. Biederman } else { 20916640e697SEric W. Biederman iter->tnode = NULL; 20926640e697SEric W. Biederman iter->trie = t; 20936640e697SEric W. Biederman iter->index = 0; 20946640e697SEric W. Biederman iter->depth = 0; 20956640e697SEric W. Biederman } 2096cb7b593cSStephen Hemminger return n; 2097cb7b593cSStephen Hemminger } 2098cb7b593cSStephen Hemminger return NULL; 2099cb7b593cSStephen Hemminger } 2100cb7b593cSStephen Hemminger 2101cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 210219baf839SRobert Olsson { 21032373ce1cSRobert Olsson struct node *n; 2104cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2105cb7b593cSStephen Hemminger 2106cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 210719baf839SRobert Olsson 21082373ce1cSRobert Olsson rcu_read_lock(); 2109cb7b593cSStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; 2110cb7b593cSStephen Hemminger n = fib_trie_get_next(&iter)) { 2111cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 211293672292SStephen Hemminger struct leaf *l = (struct leaf *)n; 211393672292SStephen Hemminger struct leaf_info *li; 211493672292SStephen Hemminger struct hlist_node *tmp; 211593672292SStephen Hemminger 211619baf839SRobert Olsson s->leaves++; 2117cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2118cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2119cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 212093672292SStephen Hemminger 212193672292SStephen Hemminger hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) 212293672292SStephen Hemminger ++s->prefixes; 212391b9a277SOlof Johansson } else { 2124cb7b593cSStephen Hemminger const struct tnode *tn = (const struct tnode *) n; 2125cb7b593cSStephen Hemminger int i; 212619baf839SRobert Olsson 212719baf839SRobert Olsson s->tnodes++; 212806ef921dSRobert Olsson if (tn->bits < MAX_STAT_DEPTH) 212919baf839SRobert Olsson s->nodesizes[tn->bits]++; 213006ef921dSRobert Olsson 2131cb7b593cSStephen Hemminger for (i = 0; i < (1<<tn->bits); i++) 2132cb7b593cSStephen Hemminger if (!tn->child[i]) 213319baf839SRobert Olsson s->nullpointers++; 213419baf839SRobert Olsson } 213519baf839SRobert Olsson } 21362373ce1cSRobert Olsson rcu_read_unlock(); 213719baf839SRobert Olsson } 213819baf839SRobert Olsson 213919baf839SRobert Olsson /* 214019baf839SRobert Olsson * This outputs /proc/net/fib_triestats 214119baf839SRobert Olsson */ 2142cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 214319baf839SRobert Olsson { 2144cb7b593cSStephen Hemminger unsigned i, max, pointers, bytes, avdepth; 214519baf839SRobert Olsson 214619baf839SRobert Olsson if (stat->leaves) 214719baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 214819baf839SRobert Olsson else 214919baf839SRobert Olsson avdepth = 0; 215019baf839SRobert Olsson 2151a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2152a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2153cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2154cb7b593cSStephen Hemminger 2155cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2156cb7b593cSStephen Hemminger bytes = sizeof(struct leaf) * stat->leaves; 215793672292SStephen Hemminger 215893672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 215993672292SStephen Hemminger bytes += sizeof(struct leaf_info) * stat->prefixes; 216093672292SStephen Hemminger 2161187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 216219baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 216319baf839SRobert Olsson 216406ef921dSRobert Olsson max = MAX_STAT_DEPTH; 216506ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 216619baf839SRobert Olsson max--; 216719baf839SRobert Olsson 2168cb7b593cSStephen Hemminger pointers = 0; 216919baf839SRobert Olsson for (i = 1; i <= max; i++) 217019baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2171187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 217219baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 217319baf839SRobert Olsson } 2174cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2175187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2176cb7b593cSStephen Hemminger 217719baf839SRobert Olsson bytes += sizeof(struct node *) * pointers; 2178187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2179187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 218066a2f7fdSStephen Hemminger } 218119baf839SRobert Olsson 218219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 218366a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 218466a2f7fdSStephen Hemminger const struct trie_use_stats *stats) 218566a2f7fdSStephen Hemminger { 218666a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 218766a2f7fdSStephen Hemminger seq_printf(seq, "gets = %u\n", stats->gets); 218866a2f7fdSStephen Hemminger seq_printf(seq, "backtracks = %u\n", stats->backtrack); 2189a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 2190a07f5f50SStephen Hemminger stats->semantic_match_passed); 2191a07f5f50SStephen Hemminger seq_printf(seq, "semantic match miss = %u\n", 2192a07f5f50SStephen Hemminger stats->semantic_match_miss); 219366a2f7fdSStephen Hemminger seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); 2194a07f5f50SStephen Hemminger seq_printf(seq, "skipped node resize = %u\n\n", 2195a07f5f50SStephen Hemminger stats->resize_node_skipped); 219619baf839SRobert Olsson } 219766a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 219866a2f7fdSStephen Hemminger 2199a07f5f50SStephen Hemminger static void fib_trie_show(struct seq_file *seq, const char *name, 2200a07f5f50SStephen Hemminger struct trie *trie) 2201d717a9a6SStephen Hemminger { 2202d717a9a6SStephen Hemminger struct trie_stat stat; 2203d717a9a6SStephen Hemminger 2204d717a9a6SStephen Hemminger trie_collect_stats(trie, &stat); 220593672292SStephen Hemminger seq_printf(seq, "%s:\n", name); 2206d717a9a6SStephen Hemminger trie_show_stats(seq, &stat); 2207d717a9a6SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 2208d717a9a6SStephen Hemminger trie_show_usage(seq, &trie->stats); 2209d717a9a6SStephen Hemminger #endif 2210d717a9a6SStephen Hemminger } 221119baf839SRobert Olsson 221219baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 221319baf839SRobert Olsson { 22141c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 2215877a9bffSEric W. Biederman struct fib_table *tb; 2216877a9bffSEric W. Biederman 2217d717a9a6SStephen Hemminger seq_printf(seq, 2218a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2219a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 222019baf839SRobert Olsson sizeof(struct leaf), sizeof(struct tnode)); 222119baf839SRobert Olsson 2222d717a9a6SStephen Hemminger tb = fib_get_table(net, RT_TABLE_LOCAL); 2223d717a9a6SStephen Hemminger if (tb) 2224d717a9a6SStephen Hemminger fib_trie_show(seq, "Local", (struct trie *) tb->tb_data); 2225cb7b593cSStephen Hemminger 2226d717a9a6SStephen Hemminger tb = fib_get_table(net, RT_TABLE_MAIN); 2227d717a9a6SStephen Hemminger if (tb) 2228d717a9a6SStephen Hemminger fib_trie_show(seq, "Main", (struct trie *) tb->tb_data); 2229cb7b593cSStephen Hemminger 223019baf839SRobert Olsson return 0; 223119baf839SRobert Olsson } 223219baf839SRobert Olsson 223319baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 223419baf839SRobert Olsson { 22351c340b2fSDenis V. Lunev int err; 22361c340b2fSDenis V. Lunev struct net *net; 22371c340b2fSDenis V. Lunev 22381c340b2fSDenis V. Lunev net = get_proc_net(inode); 22391c340b2fSDenis V. Lunev if (net == NULL) 22401c340b2fSDenis V. Lunev return -ENXIO; 22411c340b2fSDenis V. Lunev err = single_open(file, fib_triestat_seq_show, net); 22421c340b2fSDenis V. Lunev if (err < 0) { 22431c340b2fSDenis V. Lunev put_net(net); 22441c340b2fSDenis V. Lunev return err; 22451c340b2fSDenis V. Lunev } 22461c340b2fSDenis V. Lunev return 0; 22471c340b2fSDenis V. Lunev } 22481c340b2fSDenis V. Lunev 22491c340b2fSDenis V. Lunev static int fib_triestat_seq_release(struct inode *ino, struct file *f) 22501c340b2fSDenis V. Lunev { 22511c340b2fSDenis V. Lunev struct seq_file *seq = f->private_data; 22521c340b2fSDenis V. Lunev put_net(seq->private); 22531c340b2fSDenis V. Lunev return single_release(ino, f); 225419baf839SRobert Olsson } 225519baf839SRobert Olsson 22569a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 225719baf839SRobert Olsson .owner = THIS_MODULE, 225819baf839SRobert Olsson .open = fib_triestat_seq_open, 225919baf839SRobert Olsson .read = seq_read, 226019baf839SRobert Olsson .llseek = seq_lseek, 22611c340b2fSDenis V. Lunev .release = fib_triestat_seq_release, 226219baf839SRobert Olsson }; 226319baf839SRobert Olsson 2264cb7b593cSStephen Hemminger static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2265cb7b593cSStephen Hemminger loff_t pos) 226619baf839SRobert Olsson { 2267cb7b593cSStephen Hemminger loff_t idx = 0; 2268cb7b593cSStephen Hemminger struct node *n; 2269cb7b593cSStephen Hemminger 2270877a9bffSEric W. Biederman for (n = fib_trie_get_first(iter, iter->trie_local); 2271cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2272cb7b593cSStephen Hemminger if (pos == idx) 2273cb7b593cSStephen Hemminger return n; 227419baf839SRobert Olsson } 227519baf839SRobert Olsson 2276877a9bffSEric W. Biederman for (n = fib_trie_get_first(iter, iter->trie_main); 2277cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2278cb7b593cSStephen Hemminger if (pos == idx) 2279cb7b593cSStephen Hemminger return n; 228019baf839SRobert Olsson } 228119baf839SRobert Olsson return NULL; 228219baf839SRobert Olsson } 228319baf839SRobert Olsson 228419baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2285c95aaf9aSStephen Hemminger __acquires(RCU) 228619baf839SRobert Olsson { 2287877a9bffSEric W. Biederman struct fib_trie_iter *iter = seq->private; 2288877a9bffSEric W. Biederman struct fib_table *tb; 2289877a9bffSEric W. Biederman 2290877a9bffSEric W. Biederman if (!iter->trie_local) { 22911c340b2fSDenis V. Lunev tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL); 2292877a9bffSEric W. Biederman if (tb) 2293877a9bffSEric W. Biederman iter->trie_local = (struct trie *) tb->tb_data; 2294877a9bffSEric W. Biederman } 2295877a9bffSEric W. Biederman if (!iter->trie_main) { 22961c340b2fSDenis V. Lunev tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); 2297877a9bffSEric W. Biederman if (tb) 2298877a9bffSEric W. Biederman iter->trie_main = (struct trie *) tb->tb_data; 2299877a9bffSEric W. Biederman } 2300cb7b593cSStephen Hemminger rcu_read_lock(); 2301cb7b593cSStephen Hemminger if (*pos == 0) 230291b9a277SOlof Johansson return SEQ_START_TOKEN; 2303877a9bffSEric W. Biederman return fib_trie_get_idx(iter, *pos - 1); 230419baf839SRobert Olsson } 230519baf839SRobert Olsson 230619baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 230719baf839SRobert Olsson { 2308cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 2309cb7b593cSStephen Hemminger void *l = v; 2310cb7b593cSStephen Hemminger 231119baf839SRobert Olsson ++*pos; 231291b9a277SOlof Johansson if (v == SEQ_START_TOKEN) 2313cb7b593cSStephen Hemminger return fib_trie_get_idx(iter, 0); 231491b9a277SOlof Johansson 2315cb7b593cSStephen Hemminger v = fib_trie_get_next(iter); 2316cb7b593cSStephen Hemminger BUG_ON(v == l); 2317cb7b593cSStephen Hemminger if (v) 2318cb7b593cSStephen Hemminger return v; 2319cb7b593cSStephen Hemminger 2320cb7b593cSStephen Hemminger /* continue scan in next trie */ 2321877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2322877a9bffSEric W. Biederman return fib_trie_get_first(iter, iter->trie_main); 2323cb7b593cSStephen Hemminger 2324cb7b593cSStephen Hemminger return NULL; 232519baf839SRobert Olsson } 232619baf839SRobert Olsson 232719baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2328c95aaf9aSStephen Hemminger __releases(RCU) 232919baf839SRobert Olsson { 2330cb7b593cSStephen Hemminger rcu_read_unlock(); 233119baf839SRobert Olsson } 233219baf839SRobert Olsson 2333cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2334cb7b593cSStephen Hemminger { 2335cb7b593cSStephen Hemminger while (n-- > 0) seq_puts(seq, " "); 2336cb7b593cSStephen Hemminger } 233719baf839SRobert Olsson 233828d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2339cb7b593cSStephen Hemminger { 2340cb7b593cSStephen Hemminger switch (s) { 2341cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2342cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2343cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2344cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2345cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2346cb7b593cSStephen Hemminger default: 234728d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2348cb7b593cSStephen Hemminger return buf; 2349cb7b593cSStephen Hemminger } 2350cb7b593cSStephen Hemminger } 2351cb7b593cSStephen Hemminger 2352cb7b593cSStephen Hemminger static const char *rtn_type_names[__RTN_MAX] = { 2353cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2354cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2355cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2356cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2357cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2358cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2359cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2360cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2361cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2362cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2363cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2364cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2365cb7b593cSStephen Hemminger }; 2366cb7b593cSStephen Hemminger 236728d36e37SEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2368cb7b593cSStephen Hemminger { 2369cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2370cb7b593cSStephen Hemminger return rtn_type_names[t]; 237128d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2372cb7b593cSStephen Hemminger return buf; 2373cb7b593cSStephen Hemminger } 2374cb7b593cSStephen Hemminger 2375cb7b593cSStephen Hemminger /* Pretty print the trie */ 237619baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 237719baf839SRobert Olsson { 2378cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2379cb7b593cSStephen Hemminger struct node *n = v; 238019baf839SRobert Olsson 2381cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) 2382cb7b593cSStephen Hemminger return 0; 238319baf839SRobert Olsson 2384b59cfbf7SEric Dumazet if (!node_parent_rcu(n)) { 2385877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2386cb7b593cSStephen Hemminger seq_puts(seq, "<local>:\n"); 2387cb7b593cSStephen Hemminger else 2388cb7b593cSStephen Hemminger seq_puts(seq, "<main>:\n"); 2389cb7b593cSStephen Hemminger } 2390095b8501SRobert Olsson 2391095b8501SRobert Olsson if (IS_TNODE(n)) { 2392095b8501SRobert Olsson struct tnode *tn = (struct tnode *) n; 2393ab66b4a7SStephen Hemminger __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2394095b8501SRobert Olsson 23951d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 23961d25cd6cSRobert Olsson seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", 23971d25cd6cSRobert Olsson NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 23981d25cd6cSRobert Olsson tn->empty_children); 23991d25cd6cSRobert Olsson 2400cb7b593cSStephen Hemminger } else { 2401cb7b593cSStephen Hemminger struct leaf *l = (struct leaf *) n; 24021328042eSStephen Hemminger struct leaf_info *li; 24031328042eSStephen Hemminger struct hlist_node *node; 24041328042eSStephen Hemminger 240532ab5f80SAl Viro __be32 val = htonl(l->key); 2406cb7b593cSStephen Hemminger 2407cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2408cb7b593cSStephen Hemminger seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 240928d36e37SEric Dumazet 24101328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2411cb7b593cSStephen Hemminger struct fib_alias *fa; 241228d36e37SEric Dumazet 2413cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 241428d36e37SEric Dumazet char buf1[32], buf2[32]; 241528d36e37SEric Dumazet 2416cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 24171328042eSStephen Hemminger seq_printf(seq, " /%d %s %s", li->plen, 241828d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 241928d36e37SEric Dumazet fa->fa_scope), 242028d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 242128d36e37SEric Dumazet fa->fa_type)); 2422cb7b593cSStephen Hemminger if (fa->fa_tos) 2423cb7b593cSStephen Hemminger seq_printf(seq, "tos =%d\n", 2424cb7b593cSStephen Hemminger fa->fa_tos); 2425cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2426cb7b593cSStephen Hemminger } 2427cb7b593cSStephen Hemminger } 2428cb7b593cSStephen Hemminger } 242919baf839SRobert Olsson 243019baf839SRobert Olsson return 0; 243119baf839SRobert Olsson } 243219baf839SRobert Olsson 2433f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 243419baf839SRobert Olsson .start = fib_trie_seq_start, 243519baf839SRobert Olsson .next = fib_trie_seq_next, 243619baf839SRobert Olsson .stop = fib_trie_seq_stop, 243719baf839SRobert Olsson .show = fib_trie_seq_show, 243819baf839SRobert Olsson }; 243919baf839SRobert Olsson 244019baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 244119baf839SRobert Olsson { 24421c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2443cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 244419baf839SRobert Olsson } 244519baf839SRobert Olsson 24469a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 244719baf839SRobert Olsson .owner = THIS_MODULE, 244819baf839SRobert Olsson .open = fib_trie_seq_open, 244919baf839SRobert Olsson .read = seq_read, 245019baf839SRobert Olsson .llseek = seq_lseek, 24511c340b2fSDenis V. Lunev .release = seq_release_net, 245219baf839SRobert Olsson }; 245319baf839SRobert Olsson 245432ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2455cb7b593cSStephen Hemminger { 2456cb7b593cSStephen Hemminger static unsigned type2flags[RTN_MAX + 1] = { 2457cb7b593cSStephen Hemminger [7] = RTF_REJECT, [8] = RTF_REJECT, 2458cb7b593cSStephen Hemminger }; 2459cb7b593cSStephen Hemminger unsigned flags = type2flags[type]; 2460cb7b593cSStephen Hemminger 2461cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2462cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 246332ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2464cb7b593cSStephen Hemminger flags |= RTF_HOST; 2465cb7b593cSStephen Hemminger flags |= RTF_UP; 2466cb7b593cSStephen Hemminger return flags; 2467cb7b593cSStephen Hemminger } 2468cb7b593cSStephen Hemminger 2469cb7b593cSStephen Hemminger /* 2470cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2471cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2472cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2473cb7b593cSStephen Hemminger * legacy utilities 2474cb7b593cSStephen Hemminger */ 2475cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2476cb7b593cSStephen Hemminger { 2477c9e53cbeSPatrick McHardy const struct fib_trie_iter *iter = seq->private; 2478cb7b593cSStephen Hemminger struct leaf *l = v; 24791328042eSStephen Hemminger struct leaf_info *li; 24801328042eSStephen Hemminger struct hlist_node *node; 2481cb7b593cSStephen Hemminger 2482cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2483cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2484cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2485cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2486cb7b593cSStephen Hemminger return 0; 2487cb7b593cSStephen Hemminger } 2488cb7b593cSStephen Hemminger 2489877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2490c9e53cbeSPatrick McHardy return 0; 2491a07f5f50SStephen Hemminger 2492cb7b593cSStephen Hemminger if (IS_TNODE(l)) 2493cb7b593cSStephen Hemminger return 0; 2494cb7b593cSStephen Hemminger 24951328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2496cb7b593cSStephen Hemminger struct fib_alias *fa; 249732ab5f80SAl Viro __be32 mask, prefix; 2498cb7b593cSStephen Hemminger 2499cb7b593cSStephen Hemminger if (!li) 2500cb7b593cSStephen Hemminger continue; 2501cb7b593cSStephen Hemminger 2502cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2503cb7b593cSStephen Hemminger prefix = htonl(l->key); 2504cb7b593cSStephen Hemminger 2505cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 25061371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2507cb7b593cSStephen Hemminger unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 25081328042eSStephen Hemminger char bf[128]; 2509cb7b593cSStephen Hemminger 2510cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2511cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2512cb7b593cSStephen Hemminger continue; 2513cb7b593cSStephen Hemminger 2514cb7b593cSStephen Hemminger if (fi) 2515cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2516cb7b593cSStephen Hemminger "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2517cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2518cb7b593cSStephen Hemminger prefix, 2519cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2520cb7b593cSStephen Hemminger fi->fib_priority, 2521cb7b593cSStephen Hemminger mask, 2522a07f5f50SStephen Hemminger (fi->fib_advmss ? 2523a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2524cb7b593cSStephen Hemminger fi->fib_window, 2525cb7b593cSStephen Hemminger fi->fib_rtt >> 3); 2526cb7b593cSStephen Hemminger else 2527cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2528cb7b593cSStephen Hemminger "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2529cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2530cb7b593cSStephen Hemminger mask, 0, 0, 0); 2531cb7b593cSStephen Hemminger 2532cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", bf); 2533cb7b593cSStephen Hemminger } 2534cb7b593cSStephen Hemminger } 2535cb7b593cSStephen Hemminger 2536cb7b593cSStephen Hemminger return 0; 2537cb7b593cSStephen Hemminger } 2538cb7b593cSStephen Hemminger 2539f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 2540cb7b593cSStephen Hemminger .start = fib_trie_seq_start, 2541cb7b593cSStephen Hemminger .next = fib_trie_seq_next, 2542cb7b593cSStephen Hemminger .stop = fib_trie_seq_stop, 2543cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2544cb7b593cSStephen Hemminger }; 2545cb7b593cSStephen Hemminger 2546cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2547cb7b593cSStephen Hemminger { 25481c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 2549cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 2550cb7b593cSStephen Hemminger } 2551cb7b593cSStephen Hemminger 25529a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2553cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2554cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2555cb7b593cSStephen Hemminger .read = seq_read, 2556cb7b593cSStephen Hemminger .llseek = seq_lseek, 25571c340b2fSDenis V. Lunev .release = seq_release_net, 2558cb7b593cSStephen Hemminger }; 2559cb7b593cSStephen Hemminger 256061a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 256119baf839SRobert Olsson { 256261a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2563cb7b593cSStephen Hemminger goto out1; 2564cb7b593cSStephen Hemminger 256561a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, 256661a02653SDenis V. Lunev &fib_triestat_fops)) 2567cb7b593cSStephen Hemminger goto out2; 2568cb7b593cSStephen Hemminger 256961a02653SDenis V. Lunev if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) 2570cb7b593cSStephen Hemminger goto out3; 2571cb7b593cSStephen Hemminger 257219baf839SRobert Olsson return 0; 2573cb7b593cSStephen Hemminger 2574cb7b593cSStephen Hemminger out3: 257561a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 2576cb7b593cSStephen Hemminger out2: 257761a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 2578cb7b593cSStephen Hemminger out1: 2579cb7b593cSStephen Hemminger return -ENOMEM; 258019baf839SRobert Olsson } 258119baf839SRobert Olsson 258261a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 258319baf839SRobert Olsson { 258461a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 258561a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 258661a02653SDenis V. Lunev proc_net_remove(net, "route"); 258719baf839SRobert Olsson } 258819baf839SRobert Olsson 258919baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2590