119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1519baf839SRobert Olsson * This work is based on the LPC-trie which is originally descibed in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 1919baf839SRobert Olsson * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ 2619baf839SRobert Olsson * 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2919baf839SRobert Olsson * 3019baf839SRobert Olsson * 3119baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3219baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3319baf839SRobert Olsson * interface as the means of communication with the user level. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3619baf839SRobert Olsson * 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3919baf839SRobert Olsson * 4019baf839SRobert Olsson * This program is free software; you can redistribute it and/or 4119baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4219baf839SRobert Olsson * as published by the Free Software Foundation; either version 4319baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * Substantial contributions to this work comes from: 46fd966255SRobert Olsson * 47fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 48fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 49fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 50fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 5119baf839SRobert Olsson */ 5219baf839SRobert Olsson 53550e29bcSRobert Olsson #define VERSION "0.407" 5419baf839SRobert Olsson 5519baf839SRobert Olsson #include <asm/uaccess.h> 5619baf839SRobert Olsson #include <asm/system.h> 5719baf839SRobert Olsson #include <asm/bitops.h> 5819baf839SRobert Olsson #include <linux/types.h> 5919baf839SRobert Olsson #include <linux/kernel.h> 6019baf839SRobert Olsson #include <linux/sched.h> 6119baf839SRobert Olsson #include <linux/mm.h> 6219baf839SRobert Olsson #include <linux/string.h> 6319baf839SRobert Olsson #include <linux/socket.h> 6419baf839SRobert Olsson #include <linux/sockios.h> 6519baf839SRobert Olsson #include <linux/errno.h> 6619baf839SRobert Olsson #include <linux/in.h> 6719baf839SRobert Olsson #include <linux/inet.h> 68cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6919baf839SRobert Olsson #include <linux/netdevice.h> 7019baf839SRobert Olsson #include <linux/if_arp.h> 7119baf839SRobert Olsson #include <linux/proc_fs.h> 722373ce1cSRobert Olsson #include <linux/rcupdate.h> 7319baf839SRobert Olsson #include <linux/skbuff.h> 7419baf839SRobert Olsson #include <linux/netlink.h> 7519baf839SRobert Olsson #include <linux/init.h> 7619baf839SRobert Olsson #include <linux/list.h> 7719baf839SRobert Olsson #include <net/ip.h> 7819baf839SRobert Olsson #include <net/protocol.h> 7919baf839SRobert Olsson #include <net/route.h> 8019baf839SRobert Olsson #include <net/tcp.h> 8119baf839SRobert Olsson #include <net/sock.h> 8219baf839SRobert Olsson #include <net/ip_fib.h> 8319baf839SRobert Olsson #include "fib_lookup.h" 8419baf839SRobert Olsson 8519baf839SRobert Olsson #undef CONFIG_IP_FIB_TRIE_STATS 8606ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8719baf839SRobert Olsson 8819baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8919baf839SRobert Olsson #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) 9019baf839SRobert Olsson #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) 9119baf839SRobert Olsson 9219baf839SRobert Olsson typedef unsigned int t_key; 9319baf839SRobert Olsson 9419baf839SRobert Olsson #define T_TNODE 0 9519baf839SRobert Olsson #define T_LEAF 1 9619baf839SRobert Olsson #define NODE_TYPE_MASK 0x1UL 9791b9a277SOlof Johansson #define NODE_PARENT(node) \ 982373ce1cSRobert Olsson ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) 992373ce1cSRobert Olsson 1002373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) 1012373ce1cSRobert Olsson 10291b9a277SOlof Johansson #define NODE_SET_PARENT(node, ptr) \ 1032373ce1cSRobert Olsson rcu_assign_pointer((node)->parent, \ 1042373ce1cSRobert Olsson ((unsigned long)(ptr)) | NODE_TYPE(node)) 10519baf839SRobert Olsson 10691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF)) 10791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF) 10819baf839SRobert Olsson 10919baf839SRobert Olsson struct node { 11019baf839SRobert Olsson t_key key; 11191b9a277SOlof Johansson unsigned long parent; 11219baf839SRobert Olsson }; 11319baf839SRobert Olsson 11419baf839SRobert Olsson struct leaf { 11519baf839SRobert Olsson t_key key; 11691b9a277SOlof Johansson unsigned long parent; 11719baf839SRobert Olsson struct hlist_head list; 1182373ce1cSRobert Olsson struct rcu_head rcu; 11919baf839SRobert Olsson }; 12019baf839SRobert Olsson 12119baf839SRobert Olsson struct leaf_info { 12219baf839SRobert Olsson struct hlist_node hlist; 1232373ce1cSRobert Olsson struct rcu_head rcu; 12419baf839SRobert Olsson int plen; 12519baf839SRobert Olsson struct list_head falh; 12619baf839SRobert Olsson }; 12719baf839SRobert Olsson 12819baf839SRobert Olsson struct tnode { 12919baf839SRobert Olsson t_key key; 13091b9a277SOlof Johansson unsigned long parent; 13119baf839SRobert Olsson unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ 13219baf839SRobert Olsson unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ 13319baf839SRobert Olsson unsigned short full_children; /* KEYLENGTH bits needed */ 13419baf839SRobert Olsson unsigned short empty_children; /* KEYLENGTH bits needed */ 1352373ce1cSRobert Olsson struct rcu_head rcu; 13619baf839SRobert Olsson struct node *child[0]; 13719baf839SRobert Olsson }; 13819baf839SRobert Olsson 13919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14019baf839SRobert Olsson struct trie_use_stats { 14119baf839SRobert Olsson unsigned int gets; 14219baf839SRobert Olsson unsigned int backtrack; 14319baf839SRobert Olsson unsigned int semantic_match_passed; 14419baf839SRobert Olsson unsigned int semantic_match_miss; 14519baf839SRobert Olsson unsigned int null_node_hit; 1462f36895aSRobert Olsson unsigned int resize_node_skipped; 14719baf839SRobert Olsson }; 14819baf839SRobert Olsson #endif 14919baf839SRobert Olsson 15019baf839SRobert Olsson struct trie_stat { 15119baf839SRobert Olsson unsigned int totdepth; 15219baf839SRobert Olsson unsigned int maxdepth; 15319baf839SRobert Olsson unsigned int tnodes; 15419baf839SRobert Olsson unsigned int leaves; 15519baf839SRobert Olsson unsigned int nullpointers; 15606ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 15719baf839SRobert Olsson }; 15819baf839SRobert Olsson 15919baf839SRobert Olsson struct trie { 16019baf839SRobert Olsson struct node *trie; 16119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 16219baf839SRobert Olsson struct trie_use_stats stats; 16319baf839SRobert Olsson #endif 16419baf839SRobert Olsson int size; 16519baf839SRobert Olsson unsigned int revision; 16619baf839SRobert Olsson }; 16719baf839SRobert Olsson 16819baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 16919baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 17019baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn); 1712f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn); 1722f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn); 17319baf839SRobert Olsson static void tnode_free(struct tnode *tn); 17419baf839SRobert Olsson 175e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 17619baf839SRobert Olsson static struct trie *trie_local = NULL, *trie_main = NULL; 17719baf839SRobert Olsson 1782373ce1cSRobert Olsson 1792373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 1802373ce1cSRobert Olsson 18119baf839SRobert Olsson static inline struct node *tnode_get_child(struct tnode *tn, int i) 18219baf839SRobert Olsson { 18391b9a277SOlof Johansson BUG_ON(i >= 1 << tn->bits); 18419baf839SRobert Olsson 1852373ce1cSRobert Olsson return rcu_dereference(tn->child[i]); 18619baf839SRobert Olsson } 18719baf839SRobert Olsson 188bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn) 18919baf839SRobert Olsson { 19019baf839SRobert Olsson return 1 << tn->bits; 19119baf839SRobert Olsson } 19219baf839SRobert Olsson 19319baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 19419baf839SRobert Olsson { 19519baf839SRobert Olsson if (offset < KEYLENGTH) 19619baf839SRobert Olsson return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 19719baf839SRobert Olsson else 19819baf839SRobert Olsson return 0; 19919baf839SRobert Olsson } 20019baf839SRobert Olsson 20119baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b) 20219baf839SRobert Olsson { 20319baf839SRobert Olsson return a == b; 20419baf839SRobert Olsson } 20519baf839SRobert Olsson 20619baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 20719baf839SRobert Olsson { 20819baf839SRobert Olsson if (bits == 0 || offset >= KEYLENGTH) 20919baf839SRobert Olsson return 1; 21019baf839SRobert Olsson bits = bits > KEYLENGTH ? KEYLENGTH : bits; 21119baf839SRobert Olsson return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 21219baf839SRobert Olsson } 21319baf839SRobert Olsson 21419baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b) 21519baf839SRobert Olsson { 21619baf839SRobert Olsson t_key diff = a ^ b; 21719baf839SRobert Olsson int i = offset; 21819baf839SRobert Olsson 21919baf839SRobert Olsson if (!diff) 22019baf839SRobert Olsson return 0; 22119baf839SRobert Olsson while ((diff << i) >> (KEYLENGTH-1) == 0) 22219baf839SRobert Olsson i++; 22319baf839SRobert Olsson return i; 22419baf839SRobert Olsson } 22519baf839SRobert Olsson 22619baf839SRobert Olsson /* 22719baf839SRobert Olsson To understand this stuff, an understanding of keys and all their bits is 22819baf839SRobert Olsson necessary. Every node in the trie has a key associated with it, but not 22919baf839SRobert Olsson all of the bits in that key are significant. 23019baf839SRobert Olsson 23119baf839SRobert Olsson Consider a node 'n' and its parent 'tp'. 23219baf839SRobert Olsson 23319baf839SRobert Olsson If n is a leaf, every bit in its key is significant. Its presence is 234772cb712SRobert Olsson necessitated by path compression, since during a tree traversal (when 23519baf839SRobert Olsson searching for a leaf - unless we are doing an insertion) we will completely 23619baf839SRobert Olsson ignore all skipped bits we encounter. Thus we need to verify, at the end of 23719baf839SRobert Olsson a potentially successful search, that we have indeed been walking the 23819baf839SRobert Olsson correct key path. 23919baf839SRobert Olsson 24019baf839SRobert Olsson Note that we can never "miss" the correct key in the tree if present by 24119baf839SRobert Olsson following the wrong path. Path compression ensures that segments of the key 24219baf839SRobert Olsson that are the same for all keys with a given prefix are skipped, but the 24319baf839SRobert Olsson skipped part *is* identical for each node in the subtrie below the skipped 24419baf839SRobert Olsson bit! trie_insert() in this implementation takes care of that - note the 24519baf839SRobert Olsson call to tkey_sub_equals() in trie_insert(). 24619baf839SRobert Olsson 24719baf839SRobert Olsson if n is an internal node - a 'tnode' here, the various parts of its key 24819baf839SRobert Olsson have many different meanings. 24919baf839SRobert Olsson 25019baf839SRobert Olsson Example: 25119baf839SRobert Olsson _________________________________________________________________ 25219baf839SRobert Olsson | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 25319baf839SRobert Olsson ----------------------------------------------------------------- 25419baf839SRobert Olsson 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 25519baf839SRobert Olsson 25619baf839SRobert Olsson _________________________________________________________________ 25719baf839SRobert Olsson | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 25819baf839SRobert Olsson ----------------------------------------------------------------- 25919baf839SRobert Olsson 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 26019baf839SRobert Olsson 26119baf839SRobert Olsson tp->pos = 7 26219baf839SRobert Olsson tp->bits = 3 26319baf839SRobert Olsson n->pos = 15 26419baf839SRobert Olsson n->bits = 4 26519baf839SRobert Olsson 26619baf839SRobert Olsson First, let's just ignore the bits that come before the parent tp, that is 26719baf839SRobert Olsson the bits from 0 to (tp->pos-1). They are *known* but at this point we do 26819baf839SRobert Olsson not use them for anything. 26919baf839SRobert Olsson 27019baf839SRobert Olsson The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 27119baf839SRobert Olsson index into the parent's child array. That is, they will be used to find 27219baf839SRobert Olsson 'n' among tp's children. 27319baf839SRobert Olsson 27419baf839SRobert Olsson The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 27519baf839SRobert Olsson for the node n. 27619baf839SRobert Olsson 27719baf839SRobert Olsson All the bits we have seen so far are significant to the node n. The rest 27819baf839SRobert Olsson of the bits are really not needed or indeed known in n->key. 27919baf839SRobert Olsson 28019baf839SRobert Olsson The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 28119baf839SRobert Olsson n's child array, and will of course be different for each child. 28219baf839SRobert Olsson 283c877efb2SStephen Hemminger 28419baf839SRobert Olsson The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 28519baf839SRobert Olsson at this point. 28619baf839SRobert Olsson 28719baf839SRobert Olsson */ 28819baf839SRobert Olsson 2890c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn) 29019baf839SRobert Olsson { 2910c7770c7SStephen Hemminger WARN_ON(tn && tn->pos+tn->bits > 32); 29219baf839SRobert Olsson } 29319baf839SRobert Olsson 29419baf839SRobert Olsson static int halve_threshold = 25; 29519baf839SRobert Olsson static int inflate_threshold = 50; 296e6308be8SRobert Olsson static int halve_threshold_root = 15; 297e6308be8SRobert Olsson static int inflate_threshold_root = 25; 29819baf839SRobert Olsson 2992373ce1cSRobert Olsson 3002373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3012373ce1cSRobert Olsson { 3022373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3032373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3042373ce1cSRobert Olsson } 3052373ce1cSRobert Olsson 3062373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3072373ce1cSRobert Olsson { 3082373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3092373ce1cSRobert Olsson } 3102373ce1cSRobert Olsson 3112373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head) 3122373ce1cSRobert Olsson { 3132373ce1cSRobert Olsson kfree(container_of(head, struct leaf, rcu)); 3142373ce1cSRobert Olsson } 3152373ce1cSRobert Olsson 3162373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head) 3172373ce1cSRobert Olsson { 3182373ce1cSRobert Olsson kfree(container_of(head, struct leaf_info, rcu)); 3192373ce1cSRobert Olsson } 3202373ce1cSRobert Olsson 3212373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 3222373ce1cSRobert Olsson { 3232373ce1cSRobert Olsson call_rcu(&leaf->rcu, __leaf_info_free_rcu); 3242373ce1cSRobert Olsson } 3252373ce1cSRobert Olsson 3262373ce1cSRobert Olsson static struct tnode *tnode_alloc(unsigned int size) 3272373ce1cSRobert Olsson { 3282373ce1cSRobert Olsson struct page *pages; 3292373ce1cSRobert Olsson 3302373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3312373ce1cSRobert Olsson return kcalloc(size, 1, GFP_KERNEL); 3322373ce1cSRobert Olsson 3332373ce1cSRobert Olsson pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 3342373ce1cSRobert Olsson if (!pages) 3352373ce1cSRobert Olsson return NULL; 3362373ce1cSRobert Olsson 3372373ce1cSRobert Olsson return page_address(pages); 3382373ce1cSRobert Olsson } 3392373ce1cSRobert Olsson 3402373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head) 3412373ce1cSRobert Olsson { 3422373ce1cSRobert Olsson struct tnode *tn = container_of(head, struct tnode, rcu); 3432373ce1cSRobert Olsson unsigned int size = sizeof(struct tnode) + 3442373ce1cSRobert Olsson (1 << tn->bits) * sizeof(struct node *); 3452373ce1cSRobert Olsson 3462373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3472373ce1cSRobert Olsson kfree(tn); 3482373ce1cSRobert Olsson else 3492373ce1cSRobert Olsson free_pages((unsigned long)tn, get_order(size)); 3502373ce1cSRobert Olsson } 3512373ce1cSRobert Olsson 3522373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn) 3532373ce1cSRobert Olsson { 354550e29bcSRobert Olsson if(IS_LEAF(tn)) { 355550e29bcSRobert Olsson struct leaf *l = (struct leaf *) tn; 356550e29bcSRobert Olsson call_rcu_bh(&l->rcu, __leaf_free_rcu); 357550e29bcSRobert Olsson } 358550e29bcSRobert Olsson else 3592373ce1cSRobert Olsson call_rcu(&tn->rcu, __tnode_free_rcu); 3602373ce1cSRobert Olsson } 3612373ce1cSRobert Olsson 36219baf839SRobert Olsson static struct leaf *leaf_new(void) 36319baf839SRobert Olsson { 36419baf839SRobert Olsson struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 36519baf839SRobert Olsson if (l) { 3662373ce1cSRobert Olsson l->parent = T_LEAF; 36719baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 36819baf839SRobert Olsson } 36919baf839SRobert Olsson return l; 37019baf839SRobert Olsson } 37119baf839SRobert Olsson 37219baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 37319baf839SRobert Olsson { 37419baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 3752373ce1cSRobert Olsson if (li) { 37619baf839SRobert Olsson li->plen = plen; 37719baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 3782373ce1cSRobert Olsson } 37919baf839SRobert Olsson return li; 38019baf839SRobert Olsson } 38119baf839SRobert Olsson 38219baf839SRobert Olsson static struct tnode* tnode_new(t_key key, int pos, int bits) 38319baf839SRobert Olsson { 38419baf839SRobert Olsson int nchildren = 1<<bits; 38519baf839SRobert Olsson int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 386f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 38719baf839SRobert Olsson 38819baf839SRobert Olsson if (tn) { 38919baf839SRobert Olsson memset(tn, 0, sz); 3902373ce1cSRobert Olsson tn->parent = T_TNODE; 39119baf839SRobert Olsson tn->pos = pos; 39219baf839SRobert Olsson tn->bits = bits; 39319baf839SRobert Olsson tn->key = key; 39419baf839SRobert Olsson tn->full_children = 0; 39519baf839SRobert Olsson tn->empty_children = 1<<bits; 39619baf839SRobert Olsson } 397c877efb2SStephen Hemminger 3980c7770c7SStephen Hemminger pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), 39919baf839SRobert Olsson (unsigned int) (sizeof(struct node) * 1<<bits)); 40019baf839SRobert Olsson return tn; 40119baf839SRobert Olsson } 40219baf839SRobert Olsson 40319baf839SRobert Olsson /* 40419baf839SRobert Olsson * Check whether a tnode 'n' is "full", i.e. it is an internal node 40519baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 40619baf839SRobert Olsson */ 40719baf839SRobert Olsson 408bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n) 40919baf839SRobert Olsson { 41019baf839SRobert Olsson if (n == NULL || IS_LEAF(n)) 41119baf839SRobert Olsson return 0; 41219baf839SRobert Olsson 41319baf839SRobert Olsson return ((struct tnode *) n)->pos == tn->pos + tn->bits; 41419baf839SRobert Olsson } 41519baf839SRobert Olsson 41619baf839SRobert Olsson static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 41719baf839SRobert Olsson { 41819baf839SRobert Olsson tnode_put_child_reorg(tn, i, n, -1); 41919baf839SRobert Olsson } 42019baf839SRobert Olsson 42119baf839SRobert Olsson /* 42219baf839SRobert Olsson * Add a child at position i overwriting the old value. 42319baf839SRobert Olsson * Update the value of full_children and empty_children. 42419baf839SRobert Olsson */ 42519baf839SRobert Olsson 42619baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 42719baf839SRobert Olsson { 4282373ce1cSRobert Olsson struct node *chi = tn->child[i]; 42919baf839SRobert Olsson int isfull; 43019baf839SRobert Olsson 4310c7770c7SStephen Hemminger BUG_ON(i >= 1<<tn->bits); 4320c7770c7SStephen Hemminger 43319baf839SRobert Olsson 43419baf839SRobert Olsson /* update emptyChildren */ 43519baf839SRobert Olsson if (n == NULL && chi != NULL) 43619baf839SRobert Olsson tn->empty_children++; 43719baf839SRobert Olsson else if (n != NULL && chi == NULL) 43819baf839SRobert Olsson tn->empty_children--; 43919baf839SRobert Olsson 44019baf839SRobert Olsson /* update fullChildren */ 44119baf839SRobert Olsson if (wasfull == -1) 44219baf839SRobert Olsson wasfull = tnode_full(tn, chi); 44319baf839SRobert Olsson 44419baf839SRobert Olsson isfull = tnode_full(tn, n); 44519baf839SRobert Olsson if (wasfull && !isfull) 44619baf839SRobert Olsson tn->full_children--; 44719baf839SRobert Olsson else if (!wasfull && isfull) 44819baf839SRobert Olsson tn->full_children++; 44991b9a277SOlof Johansson 45019baf839SRobert Olsson if (n) 45119baf839SRobert Olsson NODE_SET_PARENT(n, tn); 45219baf839SRobert Olsson 4532373ce1cSRobert Olsson rcu_assign_pointer(tn->child[i], n); 45419baf839SRobert Olsson } 45519baf839SRobert Olsson 45619baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn) 45719baf839SRobert Olsson { 45819baf839SRobert Olsson int i; 4592f36895aSRobert Olsson int err = 0; 4602f80b3c8SRobert Olsson struct tnode *old_tn; 461e6308be8SRobert Olsson int inflate_threshold_use; 462e6308be8SRobert Olsson int halve_threshold_use; 46319baf839SRobert Olsson 46419baf839SRobert Olsson if (!tn) 46519baf839SRobert Olsson return NULL; 46619baf839SRobert Olsson 4670c7770c7SStephen Hemminger pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 46819baf839SRobert Olsson tn, inflate_threshold, halve_threshold); 46919baf839SRobert Olsson 47019baf839SRobert Olsson /* No children */ 47119baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn)) { 47219baf839SRobert Olsson tnode_free(tn); 47319baf839SRobert Olsson return NULL; 47419baf839SRobert Olsson } 47519baf839SRobert Olsson /* One child */ 47619baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 47719baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 47891b9a277SOlof Johansson struct node *n; 47919baf839SRobert Olsson 48091b9a277SOlof Johansson n = tn->child[i]; 4812373ce1cSRobert Olsson if (!n) 48291b9a277SOlof Johansson continue; 48319baf839SRobert Olsson 48419baf839SRobert Olsson /* compress one level */ 4852373ce1cSRobert Olsson NODE_SET_PARENT(n, NULL); 48619baf839SRobert Olsson tnode_free(tn); 48719baf839SRobert Olsson return n; 48819baf839SRobert Olsson } 48919baf839SRobert Olsson /* 49019baf839SRobert Olsson * Double as long as the resulting node has a number of 49119baf839SRobert Olsson * nonempty nodes that are above the threshold. 49219baf839SRobert Olsson */ 49319baf839SRobert Olsson 49419baf839SRobert Olsson /* 49519baf839SRobert Olsson * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 49619baf839SRobert Olsson * the Helsinki University of Technology and Matti Tikkanen of Nokia 49719baf839SRobert Olsson * Telecommunications, page 6: 49819baf839SRobert Olsson * "A node is doubled if the ratio of non-empty children to all 49919baf839SRobert Olsson * children in the *doubled* node is at least 'high'." 50019baf839SRobert Olsson * 50119baf839SRobert Olsson * 'high' in this instance is the variable 'inflate_threshold'. It 50219baf839SRobert Olsson * is expressed as a percentage, so we multiply it with 50319baf839SRobert Olsson * tnode_child_length() and instead of multiplying by 2 (since the 50419baf839SRobert Olsson * child array will be doubled by inflate()) and multiplying 50519baf839SRobert Olsson * the left-hand side by 100 (to handle the percentage thing) we 50619baf839SRobert Olsson * multiply the left-hand side by 50. 50719baf839SRobert Olsson * 50819baf839SRobert Olsson * The left-hand side may look a bit weird: tnode_child_length(tn) 50919baf839SRobert Olsson * - tn->empty_children is of course the number of non-null children 51019baf839SRobert Olsson * in the current node. tn->full_children is the number of "full" 51119baf839SRobert Olsson * children, that is non-null tnodes with a skip value of 0. 51219baf839SRobert Olsson * All of those will be doubled in the resulting inflated tnode, so 51319baf839SRobert Olsson * we just count them one extra time here. 51419baf839SRobert Olsson * 51519baf839SRobert Olsson * A clearer way to write this would be: 51619baf839SRobert Olsson * 51719baf839SRobert Olsson * to_be_doubled = tn->full_children; 51819baf839SRobert Olsson * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 51919baf839SRobert Olsson * tn->full_children; 52019baf839SRobert Olsson * 52119baf839SRobert Olsson * new_child_length = tnode_child_length(tn) * 2; 52219baf839SRobert Olsson * 52319baf839SRobert Olsson * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 52419baf839SRobert Olsson * new_child_length; 52519baf839SRobert Olsson * if (new_fill_factor >= inflate_threshold) 52619baf839SRobert Olsson * 52719baf839SRobert Olsson * ...and so on, tho it would mess up the while () loop. 52819baf839SRobert Olsson * 52919baf839SRobert Olsson * anyway, 53019baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 53119baf839SRobert Olsson * inflate_threshold 53219baf839SRobert Olsson * 53319baf839SRobert Olsson * avoid a division: 53419baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 53519baf839SRobert Olsson * inflate_threshold * new_child_length 53619baf839SRobert Olsson * 53719baf839SRobert Olsson * expand not_to_be_doubled and to_be_doubled, and shorten: 53819baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 53919baf839SRobert Olsson * tn->full_children) >= inflate_threshold * new_child_length 54019baf839SRobert Olsson * 54119baf839SRobert Olsson * expand new_child_length: 54219baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 54319baf839SRobert Olsson * tn->full_children) >= 54419baf839SRobert Olsson * inflate_threshold * tnode_child_length(tn) * 2 54519baf839SRobert Olsson * 54619baf839SRobert Olsson * shorten again: 54719baf839SRobert Olsson * 50 * (tn->full_children + tnode_child_length(tn) - 54819baf839SRobert Olsson * tn->empty_children) >= inflate_threshold * 54919baf839SRobert Olsson * tnode_child_length(tn) 55019baf839SRobert Olsson * 55119baf839SRobert Olsson */ 55219baf839SRobert Olsson 55319baf839SRobert Olsson check_tnode(tn); 55419baf839SRobert Olsson 555e6308be8SRobert Olsson /* Keep root node larger */ 556e6308be8SRobert Olsson 557e6308be8SRobert Olsson if(!tn->parent) 558e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold_root; 559e6308be8SRobert Olsson else 560e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold; 561e6308be8SRobert Olsson 5622f36895aSRobert Olsson err = 0; 56319baf839SRobert Olsson while ((tn->full_children > 0 && 56419baf839SRobert Olsson 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 565e6308be8SRobert Olsson inflate_threshold_use * tnode_child_length(tn))) { 56619baf839SRobert Olsson 5672f80b3c8SRobert Olsson old_tn = tn; 5682f80b3c8SRobert Olsson tn = inflate(t, tn); 5692f80b3c8SRobert Olsson if (IS_ERR(tn)) { 5702f80b3c8SRobert Olsson tn = old_tn; 5712f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 5722f36895aSRobert Olsson t->stats.resize_node_skipped++; 5732f36895aSRobert Olsson #endif 5742f36895aSRobert Olsson break; 5752f36895aSRobert Olsson } 57619baf839SRobert Olsson } 57719baf839SRobert Olsson 57819baf839SRobert Olsson check_tnode(tn); 57919baf839SRobert Olsson 58019baf839SRobert Olsson /* 58119baf839SRobert Olsson * Halve as long as the number of empty children in this 58219baf839SRobert Olsson * node is above threshold. 58319baf839SRobert Olsson */ 5842f36895aSRobert Olsson 585e6308be8SRobert Olsson 586e6308be8SRobert Olsson /* Keep root node larger */ 587e6308be8SRobert Olsson 588e6308be8SRobert Olsson if(!tn->parent) 589e6308be8SRobert Olsson halve_threshold_use = halve_threshold_root; 590e6308be8SRobert Olsson else 591e6308be8SRobert Olsson halve_threshold_use = halve_threshold; 592e6308be8SRobert Olsson 5932f36895aSRobert Olsson err = 0; 59419baf839SRobert Olsson while (tn->bits > 1 && 59519baf839SRobert Olsson 100 * (tnode_child_length(tn) - tn->empty_children) < 596e6308be8SRobert Olsson halve_threshold_use * tnode_child_length(tn)) { 59719baf839SRobert Olsson 5982f80b3c8SRobert Olsson old_tn = tn; 5992f80b3c8SRobert Olsson tn = halve(t, tn); 6002f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6012f80b3c8SRobert Olsson tn = old_tn; 6022f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6032f36895aSRobert Olsson t->stats.resize_node_skipped++; 6042f36895aSRobert Olsson #endif 6052f36895aSRobert Olsson break; 6062f36895aSRobert Olsson } 6072f36895aSRobert Olsson } 6082f36895aSRobert Olsson 60919baf839SRobert Olsson 61019baf839SRobert Olsson /* Only one child remains */ 61119baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 61219baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 61391b9a277SOlof Johansson struct node *n; 61419baf839SRobert Olsson 61591b9a277SOlof Johansson n = tn->child[i]; 6162373ce1cSRobert Olsson if (!n) 61791b9a277SOlof Johansson continue; 61891b9a277SOlof Johansson 61991b9a277SOlof Johansson /* compress one level */ 62091b9a277SOlof Johansson 6212373ce1cSRobert Olsson NODE_SET_PARENT(n, NULL); 62219baf839SRobert Olsson tnode_free(tn); 62319baf839SRobert Olsson return n; 62419baf839SRobert Olsson } 62519baf839SRobert Olsson 62619baf839SRobert Olsson return (struct node *) tn; 62719baf839SRobert Olsson } 62819baf839SRobert Olsson 6292f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn) 63019baf839SRobert Olsson { 63119baf839SRobert Olsson struct tnode *inode; 63219baf839SRobert Olsson struct tnode *oldtnode = tn; 63319baf839SRobert Olsson int olen = tnode_child_length(tn); 63419baf839SRobert Olsson int i; 63519baf839SRobert Olsson 6360c7770c7SStephen Hemminger pr_debug("In inflate\n"); 63719baf839SRobert Olsson 63819baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 63919baf839SRobert Olsson 6402f80b3c8SRobert Olsson if (!tn) 6412f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 6422f36895aSRobert Olsson 6432f36895aSRobert Olsson /* 6442f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 6452f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 6462f36895aSRobert Olsson * fails. In case of failure we return the oldnode and inflate 6472f36895aSRobert Olsson * of tnode is ignored. 6482f36895aSRobert Olsson */ 6492f36895aSRobert Olsson 6502f36895aSRobert Olsson for (i = 0; i < olen; i++) { 6512f36895aSRobert Olsson struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); 6522f36895aSRobert Olsson 6532f36895aSRobert Olsson if (inode && 6542f36895aSRobert Olsson IS_TNODE(inode) && 6552f36895aSRobert Olsson inode->pos == oldtnode->pos + oldtnode->bits && 6562f36895aSRobert Olsson inode->bits > 1) { 6572f36895aSRobert Olsson struct tnode *left, *right; 6582f36895aSRobert Olsson t_key m = TKEY_GET_MASK(inode->pos, 1); 6592f36895aSRobert Olsson 6602f36895aSRobert Olsson left = tnode_new(inode->key&(~m), inode->pos + 1, 6612f36895aSRobert Olsson inode->bits - 1); 6622f80b3c8SRobert Olsson if (!left) 6632f80b3c8SRobert Olsson goto nomem; 6642f36895aSRobert Olsson 6652f36895aSRobert Olsson right = tnode_new(inode->key|m, inode->pos + 1, 6662f36895aSRobert Olsson inode->bits - 1); 6672f36895aSRobert Olsson 6682f36895aSRobert Olsson if (!right) { 6692f80b3c8SRobert Olsson tnode_free(left); 6702f80b3c8SRobert Olsson goto nomem; 6712f36895aSRobert Olsson } 6722f36895aSRobert Olsson 6732f36895aSRobert Olsson put_child(t, tn, 2*i, (struct node *) left); 6742f36895aSRobert Olsson put_child(t, tn, 2*i+1, (struct node *) right); 6752f36895aSRobert Olsson } 6762f36895aSRobert Olsson } 6772f36895aSRobert Olsson 67819baf839SRobert Olsson for (i = 0; i < olen; i++) { 67919baf839SRobert Olsson struct node *node = tnode_get_child(oldtnode, i); 68091b9a277SOlof Johansson struct tnode *left, *right; 68191b9a277SOlof Johansson int size, j; 68219baf839SRobert Olsson 68319baf839SRobert Olsson /* An empty child */ 68419baf839SRobert Olsson if (node == NULL) 68519baf839SRobert Olsson continue; 68619baf839SRobert Olsson 68719baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 68819baf839SRobert Olsson 68919baf839SRobert Olsson if (IS_LEAF(node) || ((struct tnode *) node)->pos > 69019baf839SRobert Olsson tn->pos + tn->bits - 1) { 6912f36895aSRobert Olsson if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 69219baf839SRobert Olsson 1) == 0) 69319baf839SRobert Olsson put_child(t, tn, 2*i, node); 69419baf839SRobert Olsson else 69519baf839SRobert Olsson put_child(t, tn, 2*i+1, node); 69619baf839SRobert Olsson continue; 69719baf839SRobert Olsson } 69819baf839SRobert Olsson 69919baf839SRobert Olsson /* An internal node with two children */ 70019baf839SRobert Olsson inode = (struct tnode *) node; 70119baf839SRobert Olsson 70219baf839SRobert Olsson if (inode->bits == 1) { 70319baf839SRobert Olsson put_child(t, tn, 2*i, inode->child[0]); 70419baf839SRobert Olsson put_child(t, tn, 2*i+1, inode->child[1]); 70519baf839SRobert Olsson 70619baf839SRobert Olsson tnode_free(inode); 70791b9a277SOlof Johansson continue; 70819baf839SRobert Olsson } 70919baf839SRobert Olsson 71019baf839SRobert Olsson /* An internal node with more than two children */ 71119baf839SRobert Olsson 71219baf839SRobert Olsson /* We will replace this node 'inode' with two new 71319baf839SRobert Olsson * ones, 'left' and 'right', each with half of the 71419baf839SRobert Olsson * original children. The two new nodes will have 71519baf839SRobert Olsson * a position one bit further down the key and this 71619baf839SRobert Olsson * means that the "significant" part of their keys 71719baf839SRobert Olsson * (see the discussion near the top of this file) 71819baf839SRobert Olsson * will differ by one bit, which will be "0" in 71919baf839SRobert Olsson * left's key and "1" in right's key. Since we are 72019baf839SRobert Olsson * moving the key position by one step, the bit that 72119baf839SRobert Olsson * we are moving away from - the bit at position 72219baf839SRobert Olsson * (inode->pos) - is the one that will differ between 72319baf839SRobert Olsson * left and right. So... we synthesize that bit in the 72419baf839SRobert Olsson * two new keys. 72519baf839SRobert Olsson * The mask 'm' below will be a single "one" bit at 72619baf839SRobert Olsson * the position (inode->pos) 72719baf839SRobert Olsson */ 72819baf839SRobert Olsson 72919baf839SRobert Olsson /* Use the old key, but set the new significant 73019baf839SRobert Olsson * bit to zero. 73119baf839SRobert Olsson */ 7322f36895aSRobert Olsson 7332f36895aSRobert Olsson left = (struct tnode *) tnode_get_child(tn, 2*i); 7342f36895aSRobert Olsson put_child(t, tn, 2*i, NULL); 73519baf839SRobert Olsson 73691b9a277SOlof Johansson BUG_ON(!left); 73719baf839SRobert Olsson 7382f36895aSRobert Olsson right = (struct tnode *) tnode_get_child(tn, 2*i+1); 7392f36895aSRobert Olsson put_child(t, tn, 2*i+1, NULL); 74019baf839SRobert Olsson 74191b9a277SOlof Johansson BUG_ON(!right); 74219baf839SRobert Olsson 74319baf839SRobert Olsson size = tnode_child_length(left); 74419baf839SRobert Olsson for (j = 0; j < size; j++) { 74519baf839SRobert Olsson put_child(t, left, j, inode->child[j]); 74619baf839SRobert Olsson put_child(t, right, j, inode->child[j + size]); 74719baf839SRobert Olsson } 74819baf839SRobert Olsson put_child(t, tn, 2*i, resize(t, left)); 74919baf839SRobert Olsson put_child(t, tn, 2*i+1, resize(t, right)); 75019baf839SRobert Olsson 75119baf839SRobert Olsson tnode_free(inode); 75219baf839SRobert Olsson } 75319baf839SRobert Olsson tnode_free(oldtnode); 75419baf839SRobert Olsson return tn; 7552f80b3c8SRobert Olsson nomem: 7562f80b3c8SRobert Olsson { 7572f80b3c8SRobert Olsson int size = tnode_child_length(tn); 7582f80b3c8SRobert Olsson int j; 7592f80b3c8SRobert Olsson 7602f80b3c8SRobert Olsson for (j = 0; j < size; j++) 7612f80b3c8SRobert Olsson if (tn->child[j]) 7622f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 7632f80b3c8SRobert Olsson 7642f80b3c8SRobert Olsson tnode_free(tn); 7652f80b3c8SRobert Olsson 7662f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 7672f80b3c8SRobert Olsson } 76819baf839SRobert Olsson } 76919baf839SRobert Olsson 7702f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn) 77119baf839SRobert Olsson { 77219baf839SRobert Olsson struct tnode *oldtnode = tn; 77319baf839SRobert Olsson struct node *left, *right; 77419baf839SRobert Olsson int i; 77519baf839SRobert Olsson int olen = tnode_child_length(tn); 77619baf839SRobert Olsson 7770c7770c7SStephen Hemminger pr_debug("In halve\n"); 77819baf839SRobert Olsson 77919baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 78019baf839SRobert Olsson 7812f80b3c8SRobert Olsson if (!tn) 7822f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 7832f36895aSRobert Olsson 7842f36895aSRobert Olsson /* 7852f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 7862f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 7872f36895aSRobert Olsson * fails. In case of failure we return the oldnode and halve 7882f36895aSRobert Olsson * of tnode is ignored. 7892f36895aSRobert Olsson */ 7902f36895aSRobert Olsson 7912f36895aSRobert Olsson for (i = 0; i < olen; i += 2) { 7922f36895aSRobert Olsson left = tnode_get_child(oldtnode, i); 7932f36895aSRobert Olsson right = tnode_get_child(oldtnode, i+1); 7942f36895aSRobert Olsson 7952f36895aSRobert Olsson /* Two nonempty children */ 7962f36895aSRobert Olsson if (left && right) { 7972f80b3c8SRobert Olsson struct tnode *newn; 7982f36895aSRobert Olsson 7992f80b3c8SRobert Olsson newn = tnode_new(left->key, tn->pos + tn->bits, 1); 8002f80b3c8SRobert Olsson 8012f80b3c8SRobert Olsson if (!newn) 8022f80b3c8SRobert Olsson goto nomem; 8032f80b3c8SRobert Olsson 8042f80b3c8SRobert Olsson put_child(t, tn, i/2, (struct node *)newn); 8052f36895aSRobert Olsson } 8062f36895aSRobert Olsson 8072f36895aSRobert Olsson } 80819baf839SRobert Olsson 80919baf839SRobert Olsson for (i = 0; i < olen; i += 2) { 81091b9a277SOlof Johansson struct tnode *newBinNode; 81191b9a277SOlof Johansson 81219baf839SRobert Olsson left = tnode_get_child(oldtnode, i); 81319baf839SRobert Olsson right = tnode_get_child(oldtnode, i+1); 81419baf839SRobert Olsson 81519baf839SRobert Olsson /* At least one of the children is empty */ 81619baf839SRobert Olsson if (left == NULL) { 81719baf839SRobert Olsson if (right == NULL) /* Both are empty */ 81819baf839SRobert Olsson continue; 81919baf839SRobert Olsson put_child(t, tn, i/2, right); 82091b9a277SOlof Johansson continue; 82191b9a277SOlof Johansson } 82291b9a277SOlof Johansson 82391b9a277SOlof Johansson if (right == NULL) { 82419baf839SRobert Olsson put_child(t, tn, i/2, left); 82591b9a277SOlof Johansson continue; 82691b9a277SOlof Johansson } 82719baf839SRobert Olsson 82819baf839SRobert Olsson /* Two nonempty children */ 82991b9a277SOlof Johansson newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 8302f36895aSRobert Olsson put_child(t, tn, i/2, NULL); 83119baf839SRobert Olsson put_child(t, newBinNode, 0, left); 83219baf839SRobert Olsson put_child(t, newBinNode, 1, right); 83319baf839SRobert Olsson put_child(t, tn, i/2, resize(t, newBinNode)); 83419baf839SRobert Olsson } 83519baf839SRobert Olsson tnode_free(oldtnode); 83619baf839SRobert Olsson return tn; 8372f80b3c8SRobert Olsson nomem: 8382f80b3c8SRobert Olsson { 8392f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8402f80b3c8SRobert Olsson int j; 8412f80b3c8SRobert Olsson 8422f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8432f80b3c8SRobert Olsson if (tn->child[j]) 8442f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8452f80b3c8SRobert Olsson 8462f80b3c8SRobert Olsson tnode_free(tn); 8472f80b3c8SRobert Olsson 8482f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8492f80b3c8SRobert Olsson } 85019baf839SRobert Olsson } 85119baf839SRobert Olsson 85291b9a277SOlof Johansson static void trie_init(struct trie *t) 85319baf839SRobert Olsson { 85491b9a277SOlof Johansson if (!t) 85591b9a277SOlof Johansson return; 85691b9a277SOlof Johansson 85719baf839SRobert Olsson t->size = 0; 8582373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 85919baf839SRobert Olsson t->revision = 0; 86019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 86119baf839SRobert Olsson memset(&t->stats, 0, sizeof(struct trie_use_stats)); 86219baf839SRobert Olsson #endif 86319baf839SRobert Olsson } 86419baf839SRobert Olsson 865772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 8662373ce1cSRobert Olsson via get_fa_head and dump */ 8672373ce1cSRobert Olsson 868772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen) 86919baf839SRobert Olsson { 870772cb712SRobert Olsson struct hlist_head *head = &l->list; 87119baf839SRobert Olsson struct hlist_node *node; 87219baf839SRobert Olsson struct leaf_info *li; 87319baf839SRobert Olsson 8742373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, head, hlist) 87519baf839SRobert Olsson if (li->plen == plen) 87619baf839SRobert Olsson return li; 87791b9a277SOlof Johansson 87819baf839SRobert Olsson return NULL; 87919baf839SRobert Olsson } 88019baf839SRobert Olsson 88119baf839SRobert Olsson static inline struct list_head * get_fa_head(struct leaf *l, int plen) 88219baf839SRobert Olsson { 883772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 88419baf839SRobert Olsson 88591b9a277SOlof Johansson if (!li) 88691b9a277SOlof Johansson return NULL; 88719baf839SRobert Olsson 88891b9a277SOlof Johansson return &li->falh; 88919baf839SRobert Olsson } 89019baf839SRobert Olsson 89119baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 89219baf839SRobert Olsson { 89319baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 89491b9a277SOlof Johansson struct hlist_node *node; 89519baf839SRobert Olsson 89691b9a277SOlof Johansson if (hlist_empty(head)) { 8972373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 89891b9a277SOlof Johansson } else { 89991b9a277SOlof Johansson hlist_for_each_entry(li, node, head, hlist) { 90019baf839SRobert Olsson if (new->plen > li->plen) 90119baf839SRobert Olsson break; 90219baf839SRobert Olsson 90319baf839SRobert Olsson last = li; 90419baf839SRobert Olsson } 90519baf839SRobert Olsson if (last) 9062373ce1cSRobert Olsson hlist_add_after_rcu(&last->hlist, &new->hlist); 90719baf839SRobert Olsson else 9082373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 90919baf839SRobert Olsson } 91019baf839SRobert Olsson } 91119baf839SRobert Olsson 9122373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 9132373ce1cSRobert Olsson 91419baf839SRobert Olsson static struct leaf * 91519baf839SRobert Olsson fib_find_node(struct trie *t, u32 key) 91619baf839SRobert Olsson { 91719baf839SRobert Olsson int pos; 91819baf839SRobert Olsson struct tnode *tn; 91919baf839SRobert Olsson struct node *n; 92019baf839SRobert Olsson 92119baf839SRobert Olsson pos = 0; 9222373ce1cSRobert Olsson n = rcu_dereference(t->trie); 92319baf839SRobert Olsson 92419baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 92519baf839SRobert Olsson tn = (struct tnode *) n; 92619baf839SRobert Olsson 92719baf839SRobert Olsson check_tnode(tn); 92819baf839SRobert Olsson 92919baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 93019baf839SRobert Olsson pos = tn->pos + tn->bits; 93119baf839SRobert Olsson n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 93291b9a277SOlof Johansson } else 93319baf839SRobert Olsson break; 93419baf839SRobert Olsson } 93519baf839SRobert Olsson /* Case we have found a leaf. Compare prefixes */ 93619baf839SRobert Olsson 93791b9a277SOlof Johansson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) 93891b9a277SOlof Johansson return (struct leaf *)n; 93991b9a277SOlof Johansson 94019baf839SRobert Olsson return NULL; 94119baf839SRobert Olsson } 94219baf839SRobert Olsson 94319baf839SRobert Olsson static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 94419baf839SRobert Olsson { 94519baf839SRobert Olsson int wasfull; 94619baf839SRobert Olsson t_key cindex, key; 94719baf839SRobert Olsson struct tnode *tp = NULL; 94819baf839SRobert Olsson 94919baf839SRobert Olsson key = tn->key; 95019baf839SRobert Olsson 95119baf839SRobert Olsson while (tn != NULL && NODE_PARENT(tn) != NULL) { 95219baf839SRobert Olsson 95319baf839SRobert Olsson tp = NODE_PARENT(tn); 95419baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 95519baf839SRobert Olsson wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 95619baf839SRobert Olsson tn = (struct tnode *) resize (t, (struct tnode *)tn); 95719baf839SRobert Olsson tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 95819baf839SRobert Olsson 95919baf839SRobert Olsson if (!NODE_PARENT(tn)) 96019baf839SRobert Olsson break; 96119baf839SRobert Olsson 96219baf839SRobert Olsson tn = NODE_PARENT(tn); 96319baf839SRobert Olsson } 96419baf839SRobert Olsson /* Handle last (top) tnode */ 96519baf839SRobert Olsson if (IS_TNODE(tn)) 96619baf839SRobert Olsson tn = (struct tnode*) resize(t, (struct tnode *)tn); 96719baf839SRobert Olsson 96819baf839SRobert Olsson return (struct node*) tn; 96919baf839SRobert Olsson } 97019baf839SRobert Olsson 9712373ce1cSRobert Olsson /* only used from updater-side */ 9722373ce1cSRobert Olsson 97319baf839SRobert Olsson static struct list_head * 974f835e471SRobert Olsson fib_insert_node(struct trie *t, int *err, u32 key, int plen) 97519baf839SRobert Olsson { 97619baf839SRobert Olsson int pos, newpos; 97719baf839SRobert Olsson struct tnode *tp = NULL, *tn = NULL; 97819baf839SRobert Olsson struct node *n; 97919baf839SRobert Olsson struct leaf *l; 98019baf839SRobert Olsson int missbit; 98119baf839SRobert Olsson struct list_head *fa_head = NULL; 98219baf839SRobert Olsson struct leaf_info *li; 98319baf839SRobert Olsson t_key cindex; 98419baf839SRobert Olsson 98519baf839SRobert Olsson pos = 0; 98619baf839SRobert Olsson n = t->trie; 98719baf839SRobert Olsson 98819baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 98919baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 99019baf839SRobert Olsson * and we should just put our new leaf in that. 99119baf839SRobert Olsson * If we point to a T_TNODE, check if it matches our key. Note that 99219baf839SRobert Olsson * a T_TNODE might be skipping any number of bits - its 'pos' need 99319baf839SRobert Olsson * not be the parent's 'pos'+'bits'! 99419baf839SRobert Olsson * 99519baf839SRobert Olsson * If it does match the current key, get pos/bits from it, extract 99619baf839SRobert Olsson * the index from our key, push the T_TNODE and walk the tree. 99719baf839SRobert Olsson * 99819baf839SRobert Olsson * If it doesn't, we have to replace it with a new T_TNODE. 99919baf839SRobert Olsson * 100019baf839SRobert Olsson * If we point to a T_LEAF, it might or might not have the same key 100119baf839SRobert Olsson * as we do. If it does, just change the value, update the T_LEAF's 100219baf839SRobert Olsson * value, and return it. 100319baf839SRobert Olsson * If it doesn't, we need to replace it with a T_TNODE. 100419baf839SRobert Olsson */ 100519baf839SRobert Olsson 100619baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 100719baf839SRobert Olsson tn = (struct tnode *) n; 100819baf839SRobert Olsson 100919baf839SRobert Olsson check_tnode(tn); 101019baf839SRobert Olsson 101119baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 101219baf839SRobert Olsson tp = tn; 101319baf839SRobert Olsson pos = tn->pos + tn->bits; 101419baf839SRobert Olsson n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 101519baf839SRobert Olsson 10160c7770c7SStephen Hemminger BUG_ON(n && NODE_PARENT(n) != tn); 101791b9a277SOlof Johansson } else 101819baf839SRobert Olsson break; 101919baf839SRobert Olsson } 102019baf839SRobert Olsson 102119baf839SRobert Olsson /* 102219baf839SRobert Olsson * n ----> NULL, LEAF or TNODE 102319baf839SRobert Olsson * 102419baf839SRobert Olsson * tp is n's (parent) ----> NULL or TNODE 102519baf839SRobert Olsson */ 102619baf839SRobert Olsson 102791b9a277SOlof Johansson BUG_ON(tp && IS_LEAF(tp)); 102819baf839SRobert Olsson 102919baf839SRobert Olsson /* Case 1: n is a leaf. Compare prefixes */ 103019baf839SRobert Olsson 103119baf839SRobert Olsson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 103219baf839SRobert Olsson struct leaf *l = (struct leaf *) n; 103319baf839SRobert Olsson 103419baf839SRobert Olsson li = leaf_info_new(plen); 103519baf839SRobert Olsson 1036f835e471SRobert Olsson if (!li) { 1037f835e471SRobert Olsson *err = -ENOMEM; 1038f835e471SRobert Olsson goto err; 1039f835e471SRobert Olsson } 104019baf839SRobert Olsson 104119baf839SRobert Olsson fa_head = &li->falh; 104219baf839SRobert Olsson insert_leaf_info(&l->list, li); 104319baf839SRobert Olsson goto done; 104419baf839SRobert Olsson } 104519baf839SRobert Olsson t->size++; 104619baf839SRobert Olsson l = leaf_new(); 104719baf839SRobert Olsson 1048f835e471SRobert Olsson if (!l) { 1049f835e471SRobert Olsson *err = -ENOMEM; 1050f835e471SRobert Olsson goto err; 1051f835e471SRobert Olsson } 105219baf839SRobert Olsson 105319baf839SRobert Olsson l->key = key; 105419baf839SRobert Olsson li = leaf_info_new(plen); 105519baf839SRobert Olsson 1056f835e471SRobert Olsson if (!li) { 1057f835e471SRobert Olsson tnode_free((struct tnode *) l); 1058f835e471SRobert Olsson *err = -ENOMEM; 1059f835e471SRobert Olsson goto err; 1060f835e471SRobert Olsson } 106119baf839SRobert Olsson 106219baf839SRobert Olsson fa_head = &li->falh; 106319baf839SRobert Olsson insert_leaf_info(&l->list, li); 106419baf839SRobert Olsson 106519baf839SRobert Olsson if (t->trie && n == NULL) { 106691b9a277SOlof Johansson /* Case 2: n is NULL, and will just insert a new leaf */ 106719baf839SRobert Olsson 106819baf839SRobert Olsson NODE_SET_PARENT(l, tp); 106919baf839SRobert Olsson 107019baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 107119baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 107291b9a277SOlof Johansson } else { 107319baf839SRobert Olsson /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 107419baf839SRobert Olsson /* 107519baf839SRobert Olsson * Add a new tnode here 107619baf839SRobert Olsson * first tnode need some special handling 107719baf839SRobert Olsson */ 107819baf839SRobert Olsson 107919baf839SRobert Olsson if (tp) 108019baf839SRobert Olsson pos = tp->pos+tp->bits; 108119baf839SRobert Olsson else 108219baf839SRobert Olsson pos = 0; 108391b9a277SOlof Johansson 108419baf839SRobert Olsson if (n) { 108519baf839SRobert Olsson newpos = tkey_mismatch(key, pos, n->key); 108619baf839SRobert Olsson tn = tnode_new(n->key, newpos, 1); 108791b9a277SOlof Johansson } else { 108819baf839SRobert Olsson newpos = 0; 108919baf839SRobert Olsson tn = tnode_new(key, newpos, 1); /* First tnode */ 109019baf839SRobert Olsson } 1091f835e471SRobert Olsson 1092f835e471SRobert Olsson if (!tn) { 1093f835e471SRobert Olsson free_leaf_info(li); 1094f835e471SRobert Olsson tnode_free((struct tnode *) l); 1095f835e471SRobert Olsson *err = -ENOMEM; 1096f835e471SRobert Olsson goto err; 1097f835e471SRobert Olsson } 109819baf839SRobert Olsson 109919baf839SRobert Olsson NODE_SET_PARENT(tn, tp); 110019baf839SRobert Olsson 110119baf839SRobert Olsson missbit = tkey_extract_bits(key, newpos, 1); 110219baf839SRobert Olsson put_child(t, tn, missbit, (struct node *)l); 110319baf839SRobert Olsson put_child(t, tn, 1-missbit, n); 110419baf839SRobert Olsson 110519baf839SRobert Olsson if (tp) { 110619baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 110719baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 110891b9a277SOlof Johansson } else { 11092373ce1cSRobert Olsson rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ 111019baf839SRobert Olsson tp = tn; 111119baf839SRobert Olsson } 111219baf839SRobert Olsson } 111391b9a277SOlof Johansson 111491b9a277SOlof Johansson if (tp && tp->pos + tp->bits > 32) 111578c6671aSStephen Hemminger printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 111619baf839SRobert Olsson tp, tp->pos, tp->bits, key, plen); 111791b9a277SOlof Johansson 111819baf839SRobert Olsson /* Rebalance the trie */ 11192373ce1cSRobert Olsson 11202373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1121f835e471SRobert Olsson done: 1122f835e471SRobert Olsson t->revision++; 112391b9a277SOlof Johansson err: 112419baf839SRobert Olsson return fa_head; 112519baf839SRobert Olsson } 112619baf839SRobert Olsson 11274e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 112819baf839SRobert Olsson { 112919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 113019baf839SRobert Olsson struct fib_alias *fa, *new_fa; 113119baf839SRobert Olsson struct list_head *fa_head = NULL; 113219baf839SRobert Olsson struct fib_info *fi; 11334e902c57SThomas Graf int plen = cfg->fc_dst_len; 11344e902c57SThomas Graf u8 tos = cfg->fc_tos; 113519baf839SRobert Olsson u32 key, mask; 113619baf839SRobert Olsson int err; 113719baf839SRobert Olsson struct leaf *l; 113819baf839SRobert Olsson 113919baf839SRobert Olsson if (plen > 32) 114019baf839SRobert Olsson return -EINVAL; 114119baf839SRobert Olsson 11424e902c57SThomas Graf key = ntohl(cfg->fc_dst); 114319baf839SRobert Olsson 11442dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 114519baf839SRobert Olsson 114619baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 114719baf839SRobert Olsson 114819baf839SRobert Olsson if (key & ~mask) 114919baf839SRobert Olsson return -EINVAL; 115019baf839SRobert Olsson 115119baf839SRobert Olsson key = key & mask; 115219baf839SRobert Olsson 11534e902c57SThomas Graf fi = fib_create_info(cfg); 11544e902c57SThomas Graf if (IS_ERR(fi)) { 11554e902c57SThomas Graf err = PTR_ERR(fi); 115619baf839SRobert Olsson goto err; 11574e902c57SThomas Graf } 115819baf839SRobert Olsson 115919baf839SRobert Olsson l = fib_find_node(t, key); 116019baf839SRobert Olsson fa = NULL; 116119baf839SRobert Olsson 116219baf839SRobert Olsson if (l) { 116319baf839SRobert Olsson fa_head = get_fa_head(l, plen); 116419baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 116519baf839SRobert Olsson } 116619baf839SRobert Olsson 116719baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 116819baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 116919baf839SRobert Olsson * exists or to the node before which we will insert new one. 117019baf839SRobert Olsson * 117119baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 117219baf839SRobert Olsson * insert to the head of f. 117319baf839SRobert Olsson * 117419baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 117519baf839SRobert Olsson * and we need to allocate a new one of those as well. 117619baf839SRobert Olsson */ 117719baf839SRobert Olsson 117891b9a277SOlof Johansson if (fa && fa->fa_info->fib_priority == fi->fib_priority) { 117919baf839SRobert Olsson struct fib_alias *fa_orig; 118019baf839SRobert Olsson 118119baf839SRobert Olsson err = -EEXIST; 11824e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 118319baf839SRobert Olsson goto out; 118419baf839SRobert Olsson 11854e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 118619baf839SRobert Olsson struct fib_info *fi_drop; 118719baf839SRobert Olsson u8 state; 118819baf839SRobert Olsson 11892373ce1cSRobert Olsson err = -ENOBUFS; 1190e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 11912373ce1cSRobert Olsson if (new_fa == NULL) 11922373ce1cSRobert Olsson goto out; 119319baf839SRobert Olsson 119419baf839SRobert Olsson fi_drop = fa->fa_info; 11952373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 11962373ce1cSRobert Olsson new_fa->fa_info = fi; 11974e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 11984e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 119919baf839SRobert Olsson state = fa->fa_state; 12002373ce1cSRobert Olsson new_fa->fa_state &= ~FA_S_ACCESSED; 120119baf839SRobert Olsson 12022373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12032373ce1cSRobert Olsson alias_free_mem_rcu(fa); 120419baf839SRobert Olsson 120519baf839SRobert Olsson fib_release_info(fi_drop); 120619baf839SRobert Olsson if (state & FA_S_ACCESSED) 120719baf839SRobert Olsson rt_cache_flush(-1); 120819baf839SRobert Olsson 120919baf839SRobert Olsson goto succeeded; 121019baf839SRobert Olsson } 121119baf839SRobert Olsson /* Error if we find a perfect match which 121219baf839SRobert Olsson * uses the same scope, type, and nexthop 121319baf839SRobert Olsson * information. 121419baf839SRobert Olsson */ 121519baf839SRobert Olsson fa_orig = fa; 121619baf839SRobert Olsson list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 121719baf839SRobert Olsson if (fa->fa_tos != tos) 121819baf839SRobert Olsson break; 121919baf839SRobert Olsson if (fa->fa_info->fib_priority != fi->fib_priority) 122019baf839SRobert Olsson break; 12214e902c57SThomas Graf if (fa->fa_type == cfg->fc_type && 12224e902c57SThomas Graf fa->fa_scope == cfg->fc_scope && 122319baf839SRobert Olsson fa->fa_info == fi) { 122419baf839SRobert Olsson goto out; 122519baf839SRobert Olsson } 122619baf839SRobert Olsson } 12274e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 122819baf839SRobert Olsson fa = fa_orig; 122919baf839SRobert Olsson } 123019baf839SRobert Olsson err = -ENOENT; 12314e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 123219baf839SRobert Olsson goto out; 123319baf839SRobert Olsson 123419baf839SRobert Olsson err = -ENOBUFS; 1235e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 123619baf839SRobert Olsson if (new_fa == NULL) 123719baf839SRobert Olsson goto out; 123819baf839SRobert Olsson 123919baf839SRobert Olsson new_fa->fa_info = fi; 124019baf839SRobert Olsson new_fa->fa_tos = tos; 12414e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12424e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 124319baf839SRobert Olsson new_fa->fa_state = 0; 124419baf839SRobert Olsson /* 124519baf839SRobert Olsson * Insert new entry to the list. 124619baf839SRobert Olsson */ 124719baf839SRobert Olsson 1248f835e471SRobert Olsson if (!fa_head) { 1249f835e471SRobert Olsson err = 0; 1250b47b2ec1SHerbert Xu fa_head = fib_insert_node(t, &err, key, plen); 1251f835e471SRobert Olsson if (err) 1252f835e471SRobert Olsson goto out_free_new_fa; 1253f835e471SRobert Olsson } 125419baf839SRobert Olsson 12552373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 12562373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 125719baf839SRobert Olsson 125819baf839SRobert Olsson rt_cache_flush(-1); 12594e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 12604e902c57SThomas Graf &cfg->fc_nlinfo); 126119baf839SRobert Olsson succeeded: 126219baf839SRobert Olsson return 0; 1263f835e471SRobert Olsson 1264f835e471SRobert Olsson out_free_new_fa: 1265f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 126619baf839SRobert Olsson out: 126719baf839SRobert Olsson fib_release_info(fi); 126891b9a277SOlof Johansson err: 126919baf839SRobert Olsson return err; 127019baf839SRobert Olsson } 127119baf839SRobert Olsson 12722373ce1cSRobert Olsson 1273772cb712SRobert Olsson /* should be called with rcu_read_lock */ 12740c7770c7SStephen Hemminger static inline int check_leaf(struct trie *t, struct leaf *l, 12750c7770c7SStephen Hemminger t_key key, int *plen, const struct flowi *flp, 127606c74270SPatrick McHardy struct fib_result *res) 127719baf839SRobert Olsson { 127806c74270SPatrick McHardy int err, i; 1279888454c5SAl Viro __be32 mask; 128019baf839SRobert Olsson struct leaf_info *li; 128119baf839SRobert Olsson struct hlist_head *hhead = &l->list; 128219baf839SRobert Olsson struct hlist_node *node; 128319baf839SRobert Olsson 12842373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, hhead, hlist) { 128519baf839SRobert Olsson i = li->plen; 1286888454c5SAl Viro mask = inet_make_mask(i); 1287888454c5SAl Viro if (l->key != (key & ntohl(mask))) 128819baf839SRobert Olsson continue; 128919baf839SRobert Olsson 1290888454c5SAl Viro if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { 129119baf839SRobert Olsson *plen = i; 129219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 129319baf839SRobert Olsson t->stats.semantic_match_passed++; 129419baf839SRobert Olsson #endif 129506c74270SPatrick McHardy return err; 129619baf839SRobert Olsson } 129719baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 129819baf839SRobert Olsson t->stats.semantic_match_miss++; 129919baf839SRobert Olsson #endif 130019baf839SRobert Olsson } 130106c74270SPatrick McHardy return 1; 130219baf839SRobert Olsson } 130319baf839SRobert Olsson 130419baf839SRobert Olsson static int 130519baf839SRobert Olsson fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 130619baf839SRobert Olsson { 130719baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 130819baf839SRobert Olsson int plen, ret = 0; 130919baf839SRobert Olsson struct node *n; 131019baf839SRobert Olsson struct tnode *pn; 131119baf839SRobert Olsson int pos, bits; 131219baf839SRobert Olsson t_key key = ntohl(flp->fl4_dst); 131319baf839SRobert Olsson int chopped_off; 131419baf839SRobert Olsson t_key cindex = 0; 131519baf839SRobert Olsson int current_prefix_length = KEYLENGTH; 131691b9a277SOlof Johansson struct tnode *cn; 131791b9a277SOlof Johansson t_key node_prefix, key_prefix, pref_mismatch; 131891b9a277SOlof Johansson int mp; 131991b9a277SOlof Johansson 13202373ce1cSRobert Olsson rcu_read_lock(); 132119baf839SRobert Olsson 13222373ce1cSRobert Olsson n = rcu_dereference(t->trie); 132319baf839SRobert Olsson if (!n) 132419baf839SRobert Olsson goto failed; 132519baf839SRobert Olsson 132619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 132719baf839SRobert Olsson t->stats.gets++; 132819baf839SRobert Olsson #endif 132919baf839SRobert Olsson 133019baf839SRobert Olsson /* Just a leaf? */ 133119baf839SRobert Olsson if (IS_LEAF(n)) { 133206c74270SPatrick McHardy if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 133319baf839SRobert Olsson goto found; 133419baf839SRobert Olsson goto failed; 133519baf839SRobert Olsson } 133619baf839SRobert Olsson pn = (struct tnode *) n; 133719baf839SRobert Olsson chopped_off = 0; 133819baf839SRobert Olsson 133919baf839SRobert Olsson while (pn) { 134019baf839SRobert Olsson pos = pn->pos; 134119baf839SRobert Olsson bits = pn->bits; 134219baf839SRobert Olsson 134319baf839SRobert Olsson if (!chopped_off) 134419baf839SRobert Olsson cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); 134519baf839SRobert Olsson 134619baf839SRobert Olsson n = tnode_get_child(pn, cindex); 134719baf839SRobert Olsson 134819baf839SRobert Olsson if (n == NULL) { 134919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 135019baf839SRobert Olsson t->stats.null_node_hit++; 135119baf839SRobert Olsson #endif 135219baf839SRobert Olsson goto backtrace; 135319baf839SRobert Olsson } 135419baf839SRobert Olsson 135591b9a277SOlof Johansson if (IS_LEAF(n)) { 135691b9a277SOlof Johansson if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 135791b9a277SOlof Johansson goto found; 135891b9a277SOlof Johansson else 135991b9a277SOlof Johansson goto backtrace; 136091b9a277SOlof Johansson } 136191b9a277SOlof Johansson 136219baf839SRobert Olsson #define HL_OPTIMIZE 136319baf839SRobert Olsson #ifdef HL_OPTIMIZE 136491b9a277SOlof Johansson cn = (struct tnode *)n; 136519baf839SRobert Olsson 136619baf839SRobert Olsson /* 136719baf839SRobert Olsson * It's a tnode, and we can do some extra checks here if we 136819baf839SRobert Olsson * like, to avoid descending into a dead-end branch. 136919baf839SRobert Olsson * This tnode is in the parent's child array at index 137019baf839SRobert Olsson * key[p_pos..p_pos+p_bits] but potentially with some bits 137119baf839SRobert Olsson * chopped off, so in reality the index may be just a 137219baf839SRobert Olsson * subprefix, padded with zero at the end. 137319baf839SRobert Olsson * We can also take a look at any skipped bits in this 137419baf839SRobert Olsson * tnode - everything up to p_pos is supposed to be ok, 137519baf839SRobert Olsson * and the non-chopped bits of the index (se previous 137619baf839SRobert Olsson * paragraph) are also guaranteed ok, but the rest is 137719baf839SRobert Olsson * considered unknown. 137819baf839SRobert Olsson * 137919baf839SRobert Olsson * The skipped bits are key[pos+bits..cn->pos]. 138019baf839SRobert Olsson */ 138119baf839SRobert Olsson 138219baf839SRobert Olsson /* If current_prefix_length < pos+bits, we are already doing 138319baf839SRobert Olsson * actual prefix matching, which means everything from 138419baf839SRobert Olsson * pos+(bits-chopped_off) onward must be zero along some 138519baf839SRobert Olsson * branch of this subtree - otherwise there is *no* valid 138619baf839SRobert Olsson * prefix present. Here we can only check the skipped 138719baf839SRobert Olsson * bits. Remember, since we have already indexed into the 138819baf839SRobert Olsson * parent's child array, we know that the bits we chopped of 138919baf839SRobert Olsson * *are* zero. 139019baf839SRobert Olsson */ 139119baf839SRobert Olsson 139219baf839SRobert Olsson /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 139319baf839SRobert Olsson 139419baf839SRobert Olsson if (current_prefix_length < pos+bits) { 139519baf839SRobert Olsson if (tkey_extract_bits(cn->key, current_prefix_length, 139619baf839SRobert Olsson cn->pos - current_prefix_length) != 0 || 139719baf839SRobert Olsson !(cn->child[0])) 139819baf839SRobert Olsson goto backtrace; 139919baf839SRobert Olsson } 140019baf839SRobert Olsson 140119baf839SRobert Olsson /* 140219baf839SRobert Olsson * If chopped_off=0, the index is fully validated and we 140319baf839SRobert Olsson * only need to look at the skipped bits for this, the new, 140419baf839SRobert Olsson * tnode. What we actually want to do is to find out if 140519baf839SRobert Olsson * these skipped bits match our key perfectly, or if we will 140619baf839SRobert Olsson * have to count on finding a matching prefix further down, 140719baf839SRobert Olsson * because if we do, we would like to have some way of 140819baf839SRobert Olsson * verifying the existence of such a prefix at this point. 140919baf839SRobert Olsson */ 141019baf839SRobert Olsson 141119baf839SRobert Olsson /* The only thing we can do at this point is to verify that 141219baf839SRobert Olsson * any such matching prefix can indeed be a prefix to our 141319baf839SRobert Olsson * key, and if the bits in the node we are inspecting that 141419baf839SRobert Olsson * do not match our key are not ZERO, this cannot be true. 141519baf839SRobert Olsson * Thus, find out where there is a mismatch (before cn->pos) 141619baf839SRobert Olsson * and verify that all the mismatching bits are zero in the 141719baf839SRobert Olsson * new tnode's key. 141819baf839SRobert Olsson */ 141919baf839SRobert Olsson 142019baf839SRobert Olsson /* Note: We aren't very concerned about the piece of the key 142119baf839SRobert Olsson * that precede pn->pos+pn->bits, since these have already been 142219baf839SRobert Olsson * checked. The bits after cn->pos aren't checked since these are 142319baf839SRobert Olsson * by definition "unknown" at this point. Thus, what we want to 142419baf839SRobert Olsson * see is if we are about to enter the "prefix matching" state, 142519baf839SRobert Olsson * and in that case verify that the skipped bits that will prevail 142619baf839SRobert Olsson * throughout this subtree are zero, as they have to be if we are 142719baf839SRobert Olsson * to find a matching prefix. 142819baf839SRobert Olsson */ 142919baf839SRobert Olsson 143019baf839SRobert Olsson node_prefix = MASK_PFX(cn->key, cn->pos); 143119baf839SRobert Olsson key_prefix = MASK_PFX(key, cn->pos); 143219baf839SRobert Olsson pref_mismatch = key_prefix^node_prefix; 143319baf839SRobert Olsson mp = 0; 143419baf839SRobert Olsson 143519baf839SRobert Olsson /* In short: If skipped bits in this node do not match the search 143619baf839SRobert Olsson * key, enter the "prefix matching" state.directly. 143719baf839SRobert Olsson */ 143819baf839SRobert Olsson if (pref_mismatch) { 143919baf839SRobert Olsson while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 144019baf839SRobert Olsson mp++; 144119baf839SRobert Olsson pref_mismatch = pref_mismatch <<1; 144219baf839SRobert Olsson } 144319baf839SRobert Olsson key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 144419baf839SRobert Olsson 144519baf839SRobert Olsson if (key_prefix != 0) 144619baf839SRobert Olsson goto backtrace; 144719baf839SRobert Olsson 144819baf839SRobert Olsson if (current_prefix_length >= cn->pos) 144919baf839SRobert Olsson current_prefix_length = mp; 145019baf839SRobert Olsson } 145119baf839SRobert Olsson #endif 145219baf839SRobert Olsson pn = (struct tnode *)n; /* Descend */ 145319baf839SRobert Olsson chopped_off = 0; 145419baf839SRobert Olsson continue; 145591b9a277SOlof Johansson 145619baf839SRobert Olsson backtrace: 145719baf839SRobert Olsson chopped_off++; 145819baf839SRobert Olsson 145919baf839SRobert Olsson /* As zero don't change the child key (cindex) */ 146091b9a277SOlof Johansson while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) 146119baf839SRobert Olsson chopped_off++; 146219baf839SRobert Olsson 146319baf839SRobert Olsson /* Decrease current_... with bits chopped off */ 146419baf839SRobert Olsson if (current_prefix_length > pn->pos + pn->bits - chopped_off) 146519baf839SRobert Olsson current_prefix_length = pn->pos + pn->bits - chopped_off; 146619baf839SRobert Olsson 146719baf839SRobert Olsson /* 146819baf839SRobert Olsson * Either we do the actual chop off according or if we have 146919baf839SRobert Olsson * chopped off all bits in this tnode walk up to our parent. 147019baf839SRobert Olsson */ 147119baf839SRobert Olsson 147291b9a277SOlof Johansson if (chopped_off <= pn->bits) { 147319baf839SRobert Olsson cindex &= ~(1 << (chopped_off-1)); 147491b9a277SOlof Johansson } else { 147519baf839SRobert Olsson if (NODE_PARENT(pn) == NULL) 147619baf839SRobert Olsson goto failed; 147719baf839SRobert Olsson 147819baf839SRobert Olsson /* Get Child's index */ 147919baf839SRobert Olsson cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); 148019baf839SRobert Olsson pn = NODE_PARENT(pn); 148119baf839SRobert Olsson chopped_off = 0; 148219baf839SRobert Olsson 148319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 148419baf839SRobert Olsson t->stats.backtrack++; 148519baf839SRobert Olsson #endif 148619baf839SRobert Olsson goto backtrace; 148719baf839SRobert Olsson } 148819baf839SRobert Olsson } 148919baf839SRobert Olsson failed: 149019baf839SRobert Olsson ret = 1; 149119baf839SRobert Olsson found: 14922373ce1cSRobert Olsson rcu_read_unlock(); 149319baf839SRobert Olsson return ret; 149419baf839SRobert Olsson } 149519baf839SRobert Olsson 14962373ce1cSRobert Olsson /* only called from updater side */ 149719baf839SRobert Olsson static int trie_leaf_remove(struct trie *t, t_key key) 149819baf839SRobert Olsson { 149919baf839SRobert Olsson t_key cindex; 150019baf839SRobert Olsson struct tnode *tp = NULL; 150119baf839SRobert Olsson struct node *n = t->trie; 150219baf839SRobert Olsson struct leaf *l; 150319baf839SRobert Olsson 15040c7770c7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", n); 150519baf839SRobert Olsson 150619baf839SRobert Olsson /* Note that in the case skipped bits, those bits are *not* checked! 150719baf839SRobert Olsson * When we finish this, we will have NULL or a T_LEAF, and the 150819baf839SRobert Olsson * T_LEAF may or may not match our key. 150919baf839SRobert Olsson */ 151019baf839SRobert Olsson 151119baf839SRobert Olsson while (n != NULL && IS_TNODE(n)) { 151219baf839SRobert Olsson struct tnode *tn = (struct tnode *) n; 151319baf839SRobert Olsson check_tnode(tn); 151419baf839SRobert Olsson n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); 151519baf839SRobert Olsson 15160c7770c7SStephen Hemminger BUG_ON(n && NODE_PARENT(n) != tn); 151719baf839SRobert Olsson } 151819baf839SRobert Olsson l = (struct leaf *) n; 151919baf839SRobert Olsson 152019baf839SRobert Olsson if (!n || !tkey_equals(l->key, key)) 152119baf839SRobert Olsson return 0; 152219baf839SRobert Olsson 152319baf839SRobert Olsson /* 152419baf839SRobert Olsson * Key found. 152519baf839SRobert Olsson * Remove the leaf and rebalance the tree 152619baf839SRobert Olsson */ 152719baf839SRobert Olsson 152819baf839SRobert Olsson t->revision++; 152919baf839SRobert Olsson t->size--; 153019baf839SRobert Olsson 15312373ce1cSRobert Olsson preempt_disable(); 153219baf839SRobert Olsson tp = NODE_PARENT(n); 153319baf839SRobert Olsson tnode_free((struct tnode *) n); 153419baf839SRobert Olsson 153519baf839SRobert Olsson if (tp) { 153619baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 153719baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, NULL); 15382373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 153991b9a277SOlof Johansson } else 15402373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 15412373ce1cSRobert Olsson preempt_enable(); 154219baf839SRobert Olsson 154319baf839SRobert Olsson return 1; 154419baf839SRobert Olsson } 154519baf839SRobert Olsson 15464e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 154719baf839SRobert Olsson { 154819baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 154919baf839SRobert Olsson u32 key, mask; 15504e902c57SThomas Graf int plen = cfg->fc_dst_len; 15514e902c57SThomas Graf u8 tos = cfg->fc_tos; 155219baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 155319baf839SRobert Olsson struct list_head *fa_head; 155419baf839SRobert Olsson struct leaf *l; 155591b9a277SOlof Johansson struct leaf_info *li; 155691b9a277SOlof Johansson 155719baf839SRobert Olsson if (plen > 32) 155819baf839SRobert Olsson return -EINVAL; 155919baf839SRobert Olsson 15604e902c57SThomas Graf key = ntohl(cfg->fc_dst); 156119baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 156219baf839SRobert Olsson 156319baf839SRobert Olsson if (key & ~mask) 156419baf839SRobert Olsson return -EINVAL; 156519baf839SRobert Olsson 156619baf839SRobert Olsson key = key & mask; 156719baf839SRobert Olsson l = fib_find_node(t, key); 156819baf839SRobert Olsson 156919baf839SRobert Olsson if (!l) 157019baf839SRobert Olsson return -ESRCH; 157119baf839SRobert Olsson 157219baf839SRobert Olsson fa_head = get_fa_head(l, plen); 157319baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 157419baf839SRobert Olsson 157519baf839SRobert Olsson if (!fa) 157619baf839SRobert Olsson return -ESRCH; 157719baf839SRobert Olsson 15780c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 157919baf839SRobert Olsson 158019baf839SRobert Olsson fa_to_delete = NULL; 158119baf839SRobert Olsson fa_head = fa->fa_list.prev; 15822373ce1cSRobert Olsson 158319baf839SRobert Olsson list_for_each_entry(fa, fa_head, fa_list) { 158419baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 158519baf839SRobert Olsson 158619baf839SRobert Olsson if (fa->fa_tos != tos) 158719baf839SRobert Olsson break; 158819baf839SRobert Olsson 15894e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15904e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 15914e902c57SThomas Graf fa->fa_scope == cfg->fc_scope) && 15924e902c57SThomas Graf (!cfg->fc_protocol || 15934e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15944e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 159519baf839SRobert Olsson fa_to_delete = fa; 159619baf839SRobert Olsson break; 159719baf839SRobert Olsson } 159819baf839SRobert Olsson } 159919baf839SRobert Olsson 160091b9a277SOlof Johansson if (!fa_to_delete) 160191b9a277SOlof Johansson return -ESRCH; 160219baf839SRobert Olsson 160319baf839SRobert Olsson fa = fa_to_delete; 16044e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 16054e902c57SThomas Graf &cfg->fc_nlinfo); 160619baf839SRobert Olsson 160719baf839SRobert Olsson l = fib_find_node(t, key); 1608772cb712SRobert Olsson li = find_leaf_info(l, plen); 160919baf839SRobert Olsson 16102373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 161119baf839SRobert Olsson 161219baf839SRobert Olsson if (list_empty(fa_head)) { 16132373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 161419baf839SRobert Olsson free_leaf_info(li); 16152373ce1cSRobert Olsson } 161619baf839SRobert Olsson 161719baf839SRobert Olsson if (hlist_empty(&l->list)) 161819baf839SRobert Olsson trie_leaf_remove(t, key); 161919baf839SRobert Olsson 162019baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 162119baf839SRobert Olsson rt_cache_flush(-1); 162219baf839SRobert Olsson 16232373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16242373ce1cSRobert Olsson alias_free_mem_rcu(fa); 162519baf839SRobert Olsson return 0; 162619baf839SRobert Olsson } 162719baf839SRobert Olsson 162819baf839SRobert Olsson static int trie_flush_list(struct trie *t, struct list_head *head) 162919baf839SRobert Olsson { 163019baf839SRobert Olsson struct fib_alias *fa, *fa_node; 163119baf839SRobert Olsson int found = 0; 163219baf839SRobert Olsson 163319baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 163419baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 163519baf839SRobert Olsson 163619baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 16372373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 16382373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16392373ce1cSRobert Olsson alias_free_mem_rcu(fa); 164019baf839SRobert Olsson found++; 164119baf839SRobert Olsson } 164219baf839SRobert Olsson } 164319baf839SRobert Olsson return found; 164419baf839SRobert Olsson } 164519baf839SRobert Olsson 164619baf839SRobert Olsson static int trie_flush_leaf(struct trie *t, struct leaf *l) 164719baf839SRobert Olsson { 164819baf839SRobert Olsson int found = 0; 164919baf839SRobert Olsson struct hlist_head *lih = &l->list; 165019baf839SRobert Olsson struct hlist_node *node, *tmp; 165119baf839SRobert Olsson struct leaf_info *li = NULL; 165219baf839SRobert Olsson 165319baf839SRobert Olsson hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 165419baf839SRobert Olsson found += trie_flush_list(t, &li->falh); 165519baf839SRobert Olsson 165619baf839SRobert Olsson if (list_empty(&li->falh)) { 16572373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 165819baf839SRobert Olsson free_leaf_info(li); 165919baf839SRobert Olsson } 166019baf839SRobert Olsson } 166119baf839SRobert Olsson return found; 166219baf839SRobert Olsson } 166319baf839SRobert Olsson 16642373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 16652373ce1cSRobert Olsson 166619baf839SRobert Olsson static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 166719baf839SRobert Olsson { 166819baf839SRobert Olsson struct node *c = (struct node *) thisleaf; 166919baf839SRobert Olsson struct tnode *p; 167019baf839SRobert Olsson int idx; 16712373ce1cSRobert Olsson struct node *trie = rcu_dereference(t->trie); 167219baf839SRobert Olsson 167319baf839SRobert Olsson if (c == NULL) { 16742373ce1cSRobert Olsson if (trie == NULL) 167519baf839SRobert Olsson return NULL; 167619baf839SRobert Olsson 16772373ce1cSRobert Olsson if (IS_LEAF(trie)) /* trie w. just a leaf */ 16782373ce1cSRobert Olsson return (struct leaf *) trie; 167919baf839SRobert Olsson 16802373ce1cSRobert Olsson p = (struct tnode*) trie; /* Start */ 168191b9a277SOlof Johansson } else 168219baf839SRobert Olsson p = (struct tnode *) NODE_PARENT(c); 1683c877efb2SStephen Hemminger 168419baf839SRobert Olsson while (p) { 168519baf839SRobert Olsson int pos, last; 168619baf839SRobert Olsson 168719baf839SRobert Olsson /* Find the next child of the parent */ 168819baf839SRobert Olsson if (c) 168919baf839SRobert Olsson pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 169019baf839SRobert Olsson else 169119baf839SRobert Olsson pos = 0; 169219baf839SRobert Olsson 169319baf839SRobert Olsson last = 1 << p->bits; 169419baf839SRobert Olsson for (idx = pos; idx < last ; idx++) { 16952373ce1cSRobert Olsson c = rcu_dereference(p->child[idx]); 16962373ce1cSRobert Olsson 16972373ce1cSRobert Olsson if (!c) 169891b9a277SOlof Johansson continue; 169919baf839SRobert Olsson 170019baf839SRobert Olsson /* Decend if tnode */ 17012373ce1cSRobert Olsson while (IS_TNODE(c)) { 17022373ce1cSRobert Olsson p = (struct tnode *) c; 170319baf839SRobert Olsson idx = 0; 170419baf839SRobert Olsson 170519baf839SRobert Olsson /* Rightmost non-NULL branch */ 170619baf839SRobert Olsson if (p && IS_TNODE(p)) 17072373ce1cSRobert Olsson while (!(c = rcu_dereference(p->child[idx])) 17082373ce1cSRobert Olsson && idx < (1<<p->bits)) idx++; 170919baf839SRobert Olsson 171019baf839SRobert Olsson /* Done with this tnode? */ 17112373ce1cSRobert Olsson if (idx >= (1 << p->bits) || !c) 171219baf839SRobert Olsson goto up; 171319baf839SRobert Olsson } 17142373ce1cSRobert Olsson return (struct leaf *) c; 171519baf839SRobert Olsson } 171619baf839SRobert Olsson up: 171719baf839SRobert Olsson /* No more children go up one step */ 171819baf839SRobert Olsson c = (struct node *) p; 171919baf839SRobert Olsson p = (struct tnode *) NODE_PARENT(p); 172019baf839SRobert Olsson } 172119baf839SRobert Olsson return NULL; /* Ready. Root of trie */ 172219baf839SRobert Olsson } 172319baf839SRobert Olsson 172419baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb) 172519baf839SRobert Olsson { 172619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 172719baf839SRobert Olsson struct leaf *ll = NULL, *l = NULL; 172819baf839SRobert Olsson int found = 0, h; 172919baf839SRobert Olsson 173019baf839SRobert Olsson t->revision++; 173119baf839SRobert Olsson 173219baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 173319baf839SRobert Olsson found += trie_flush_leaf(t, l); 173419baf839SRobert Olsson 173519baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 173619baf839SRobert Olsson trie_leaf_remove(t, ll->key); 173719baf839SRobert Olsson ll = l; 173819baf839SRobert Olsson } 173919baf839SRobert Olsson 174019baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 174119baf839SRobert Olsson trie_leaf_remove(t, ll->key); 174219baf839SRobert Olsson 17430c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 174419baf839SRobert Olsson return found; 174519baf839SRobert Olsson } 174619baf839SRobert Olsson 174719baf839SRobert Olsson static int trie_last_dflt = -1; 174819baf839SRobert Olsson 174919baf839SRobert Olsson static void 175019baf839SRobert Olsson fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 175119baf839SRobert Olsson { 175219baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 175319baf839SRobert Olsson int order, last_idx; 175419baf839SRobert Olsson struct fib_info *fi = NULL; 175519baf839SRobert Olsson struct fib_info *last_resort; 175619baf839SRobert Olsson struct fib_alias *fa = NULL; 175719baf839SRobert Olsson struct list_head *fa_head; 175819baf839SRobert Olsson struct leaf *l; 175919baf839SRobert Olsson 176019baf839SRobert Olsson last_idx = -1; 176119baf839SRobert Olsson last_resort = NULL; 176219baf839SRobert Olsson order = -1; 176319baf839SRobert Olsson 17642373ce1cSRobert Olsson rcu_read_lock(); 176519baf839SRobert Olsson 176619baf839SRobert Olsson l = fib_find_node(t, 0); 176719baf839SRobert Olsson if (!l) 176819baf839SRobert Olsson goto out; 176919baf839SRobert Olsson 177019baf839SRobert Olsson fa_head = get_fa_head(l, 0); 177119baf839SRobert Olsson if (!fa_head) 177219baf839SRobert Olsson goto out; 177319baf839SRobert Olsson 177419baf839SRobert Olsson if (list_empty(fa_head)) 177519baf839SRobert Olsson goto out; 177619baf839SRobert Olsson 17772373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fa_head, fa_list) { 177819baf839SRobert Olsson struct fib_info *next_fi = fa->fa_info; 177919baf839SRobert Olsson 178019baf839SRobert Olsson if (fa->fa_scope != res->scope || 178119baf839SRobert Olsson fa->fa_type != RTN_UNICAST) 178219baf839SRobert Olsson continue; 178319baf839SRobert Olsson 178419baf839SRobert Olsson if (next_fi->fib_priority > res->fi->fib_priority) 178519baf839SRobert Olsson break; 178619baf839SRobert Olsson if (!next_fi->fib_nh[0].nh_gw || 178719baf839SRobert Olsson next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 178819baf839SRobert Olsson continue; 178919baf839SRobert Olsson fa->fa_state |= FA_S_ACCESSED; 179019baf839SRobert Olsson 179119baf839SRobert Olsson if (fi == NULL) { 179219baf839SRobert Olsson if (next_fi != res->fi) 179319baf839SRobert Olsson break; 179419baf839SRobert Olsson } else if (!fib_detect_death(fi, order, &last_resort, 179519baf839SRobert Olsson &last_idx, &trie_last_dflt)) { 179619baf839SRobert Olsson if (res->fi) 179719baf839SRobert Olsson fib_info_put(res->fi); 179819baf839SRobert Olsson res->fi = fi; 179919baf839SRobert Olsson atomic_inc(&fi->fib_clntref); 180019baf839SRobert Olsson trie_last_dflt = order; 180119baf839SRobert Olsson goto out; 180219baf839SRobert Olsson } 180319baf839SRobert Olsson fi = next_fi; 180419baf839SRobert Olsson order++; 180519baf839SRobert Olsson } 180619baf839SRobert Olsson if (order <= 0 || fi == NULL) { 180719baf839SRobert Olsson trie_last_dflt = -1; 180819baf839SRobert Olsson goto out; 180919baf839SRobert Olsson } 181019baf839SRobert Olsson 181119baf839SRobert Olsson if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { 181219baf839SRobert Olsson if (res->fi) 181319baf839SRobert Olsson fib_info_put(res->fi); 181419baf839SRobert Olsson res->fi = fi; 181519baf839SRobert Olsson atomic_inc(&fi->fib_clntref); 181619baf839SRobert Olsson trie_last_dflt = order; 181719baf839SRobert Olsson goto out; 181819baf839SRobert Olsson } 181919baf839SRobert Olsson if (last_idx >= 0) { 182019baf839SRobert Olsson if (res->fi) 182119baf839SRobert Olsson fib_info_put(res->fi); 182219baf839SRobert Olsson res->fi = last_resort; 182319baf839SRobert Olsson if (last_resort) 182419baf839SRobert Olsson atomic_inc(&last_resort->fib_clntref); 182519baf839SRobert Olsson } 182619baf839SRobert Olsson trie_last_dflt = last_idx; 182719baf839SRobert Olsson out:; 18282373ce1cSRobert Olsson rcu_read_unlock(); 182919baf839SRobert Olsson } 183019baf839SRobert Olsson 183119baf839SRobert Olsson static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 183219baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 183319baf839SRobert Olsson { 183419baf839SRobert Olsson int i, s_i; 183519baf839SRobert Olsson struct fib_alias *fa; 183619baf839SRobert Olsson 183732ab5f80SAl Viro __be32 xkey = htonl(key); 183819baf839SRobert Olsson 18391af5a8c4SPatrick McHardy s_i = cb->args[4]; 184019baf839SRobert Olsson i = 0; 184119baf839SRobert Olsson 18422373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 18432373ce1cSRobert Olsson 18442373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 184519baf839SRobert Olsson if (i < s_i) { 184619baf839SRobert Olsson i++; 184719baf839SRobert Olsson continue; 184819baf839SRobert Olsson } 184978c6671aSStephen Hemminger BUG_ON(!fa->fa_info); 185019baf839SRobert Olsson 185119baf839SRobert Olsson if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 185219baf839SRobert Olsson cb->nlh->nlmsg_seq, 185319baf839SRobert Olsson RTM_NEWROUTE, 185419baf839SRobert Olsson tb->tb_id, 185519baf839SRobert Olsson fa->fa_type, 185619baf839SRobert Olsson fa->fa_scope, 1857be403ea1SThomas Graf xkey, 185819baf839SRobert Olsson plen, 185919baf839SRobert Olsson fa->fa_tos, 186090f66914SDavid S. Miller fa->fa_info, 0) < 0) { 18611af5a8c4SPatrick McHardy cb->args[4] = i; 186219baf839SRobert Olsson return -1; 186319baf839SRobert Olsson } 186419baf839SRobert Olsson i++; 186519baf839SRobert Olsson } 18661af5a8c4SPatrick McHardy cb->args[4] = i; 186719baf839SRobert Olsson return skb->len; 186819baf839SRobert Olsson } 186919baf839SRobert Olsson 187019baf839SRobert Olsson static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 187119baf839SRobert Olsson struct netlink_callback *cb) 187219baf839SRobert Olsson { 187319baf839SRobert Olsson int h, s_h; 187419baf839SRobert Olsson struct list_head *fa_head; 187519baf839SRobert Olsson struct leaf *l = NULL; 187691b9a277SOlof Johansson 18771af5a8c4SPatrick McHardy s_h = cb->args[3]; 187819baf839SRobert Olsson 187919baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 188019baf839SRobert Olsson if (h < s_h) 188119baf839SRobert Olsson continue; 188219baf839SRobert Olsson if (h > s_h) 18831af5a8c4SPatrick McHardy memset(&cb->args[4], 0, 18841af5a8c4SPatrick McHardy sizeof(cb->args) - 4*sizeof(cb->args[0])); 188519baf839SRobert Olsson 188619baf839SRobert Olsson fa_head = get_fa_head(l, plen); 188719baf839SRobert Olsson 188819baf839SRobert Olsson if (!fa_head) 188919baf839SRobert Olsson continue; 189019baf839SRobert Olsson 189119baf839SRobert Olsson if (list_empty(fa_head)) 189219baf839SRobert Olsson continue; 189319baf839SRobert Olsson 189419baf839SRobert Olsson if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 18951af5a8c4SPatrick McHardy cb->args[3] = h; 189619baf839SRobert Olsson return -1; 189719baf839SRobert Olsson } 189819baf839SRobert Olsson } 18991af5a8c4SPatrick McHardy cb->args[3] = h; 190019baf839SRobert Olsson return skb->len; 190119baf839SRobert Olsson } 190219baf839SRobert Olsson 190319baf839SRobert Olsson static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 190419baf839SRobert Olsson { 190519baf839SRobert Olsson int m, s_m; 190619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 190719baf839SRobert Olsson 19081af5a8c4SPatrick McHardy s_m = cb->args[2]; 190919baf839SRobert Olsson 19102373ce1cSRobert Olsson rcu_read_lock(); 191119baf839SRobert Olsson for (m = 0; m <= 32; m++) { 191219baf839SRobert Olsson if (m < s_m) 191319baf839SRobert Olsson continue; 191419baf839SRobert Olsson if (m > s_m) 19151af5a8c4SPatrick McHardy memset(&cb->args[3], 0, 19161af5a8c4SPatrick McHardy sizeof(cb->args) - 3*sizeof(cb->args[0])); 191719baf839SRobert Olsson 191819baf839SRobert Olsson if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 19191af5a8c4SPatrick McHardy cb->args[2] = m; 192019baf839SRobert Olsson goto out; 192119baf839SRobert Olsson } 192219baf839SRobert Olsson } 19232373ce1cSRobert Olsson rcu_read_unlock(); 19241af5a8c4SPatrick McHardy cb->args[2] = m; 192519baf839SRobert Olsson return skb->len; 192619baf839SRobert Olsson out: 19272373ce1cSRobert Olsson rcu_read_unlock(); 192819baf839SRobert Olsson return -1; 192919baf839SRobert Olsson } 193019baf839SRobert Olsson 193119baf839SRobert Olsson /* Fix more generic FIB names for init later */ 193219baf839SRobert Olsson 193319baf839SRobert Olsson #ifdef CONFIG_IP_MULTIPLE_TABLES 19342dfe55b4SPatrick McHardy struct fib_table * fib_hash_init(u32 id) 193519baf839SRobert Olsson #else 19362dfe55b4SPatrick McHardy struct fib_table * __init fib_hash_init(u32 id) 193719baf839SRobert Olsson #endif 193819baf839SRobert Olsson { 193919baf839SRobert Olsson struct fib_table *tb; 194019baf839SRobert Olsson struct trie *t; 194119baf839SRobert Olsson 194219baf839SRobert Olsson if (fn_alias_kmem == NULL) 194319baf839SRobert Olsson fn_alias_kmem = kmem_cache_create("ip_fib_alias", 194419baf839SRobert Olsson sizeof(struct fib_alias), 194519baf839SRobert Olsson 0, SLAB_HWCACHE_ALIGN, 194619baf839SRobert Olsson NULL, NULL); 194719baf839SRobert Olsson 194819baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 194919baf839SRobert Olsson GFP_KERNEL); 195019baf839SRobert Olsson if (tb == NULL) 195119baf839SRobert Olsson return NULL; 195219baf839SRobert Olsson 195319baf839SRobert Olsson tb->tb_id = id; 195419baf839SRobert Olsson tb->tb_lookup = fn_trie_lookup; 195519baf839SRobert Olsson tb->tb_insert = fn_trie_insert; 195619baf839SRobert Olsson tb->tb_delete = fn_trie_delete; 195719baf839SRobert Olsson tb->tb_flush = fn_trie_flush; 195819baf839SRobert Olsson tb->tb_select_default = fn_trie_select_default; 195919baf839SRobert Olsson tb->tb_dump = fn_trie_dump; 196019baf839SRobert Olsson memset(tb->tb_data, 0, sizeof(struct trie)); 196119baf839SRobert Olsson 196219baf839SRobert Olsson t = (struct trie *) tb->tb_data; 196319baf839SRobert Olsson 196419baf839SRobert Olsson trie_init(t); 196519baf839SRobert Olsson 196619baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 196719baf839SRobert Olsson trie_local = t; 196819baf839SRobert Olsson else if (id == RT_TABLE_MAIN) 196919baf839SRobert Olsson trie_main = t; 197019baf839SRobert Olsson 197119baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 197278c6671aSStephen Hemminger printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); 197319baf839SRobert Olsson 197419baf839SRobert Olsson return tb; 197519baf839SRobert Olsson } 197619baf839SRobert Olsson 1977cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 1978cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 1979cb7b593cSStephen Hemminger struct fib_trie_iter { 1980cb7b593cSStephen Hemminger struct tnode *tnode; 1981cb7b593cSStephen Hemminger struct trie *trie; 1982cb7b593cSStephen Hemminger unsigned index; 1983cb7b593cSStephen Hemminger unsigned depth; 1984cb7b593cSStephen Hemminger }; 198519baf839SRobert Olsson 1986cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 198719baf839SRobert Olsson { 1988cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 1989cb7b593cSStephen Hemminger unsigned cindex = iter->index; 1990cb7b593cSStephen Hemminger struct tnode *p; 199119baf839SRobert Olsson 19926640e697SEric W. Biederman /* A single entry routing table */ 19936640e697SEric W. Biederman if (!tn) 19946640e697SEric W. Biederman return NULL; 19956640e697SEric W. Biederman 1996cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 1997cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 1998cb7b593cSStephen Hemminger rescan: 1999cb7b593cSStephen Hemminger while (cindex < (1<<tn->bits)) { 2000cb7b593cSStephen Hemminger struct node *n = tnode_get_child(tn, cindex); 200119baf839SRobert Olsson 2002cb7b593cSStephen Hemminger if (n) { 200319baf839SRobert Olsson if (IS_LEAF(n)) { 2004cb7b593cSStephen Hemminger iter->tnode = tn; 2005cb7b593cSStephen Hemminger iter->index = cindex + 1; 200691b9a277SOlof Johansson } else { 2007cb7b593cSStephen Hemminger /* push down one level */ 2008cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2009cb7b593cSStephen Hemminger iter->index = 0; 2010cb7b593cSStephen Hemminger ++iter->depth; 201119baf839SRobert Olsson } 2012cb7b593cSStephen Hemminger return n; 201319baf839SRobert Olsson } 201419baf839SRobert Olsson 2015cb7b593cSStephen Hemminger ++cindex; 2016cb7b593cSStephen Hemminger } 2017cb7b593cSStephen Hemminger 2018cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 2019cb7b593cSStephen Hemminger p = NODE_PARENT(tn); 2020cb7b593cSStephen Hemminger if (p) { 2021cb7b593cSStephen Hemminger cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2022cb7b593cSStephen Hemminger tn = p; 2023cb7b593cSStephen Hemminger --iter->depth; 2024cb7b593cSStephen Hemminger goto rescan; 2025cb7b593cSStephen Hemminger } 2026cb7b593cSStephen Hemminger 2027cb7b593cSStephen Hemminger /* got root? */ 2028cb7b593cSStephen Hemminger return NULL; 2029cb7b593cSStephen Hemminger } 2030cb7b593cSStephen Hemminger 2031cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2032cb7b593cSStephen Hemminger struct trie *t) 2033cb7b593cSStephen Hemminger { 20345ddf0eb2SRobert Olsson struct node *n ; 20355ddf0eb2SRobert Olsson 20365ddf0eb2SRobert Olsson if(!t) 20375ddf0eb2SRobert Olsson return NULL; 20385ddf0eb2SRobert Olsson 20395ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 20405ddf0eb2SRobert Olsson 20415ddf0eb2SRobert Olsson if(!iter) 20425ddf0eb2SRobert Olsson return NULL; 2043cb7b593cSStephen Hemminger 20446640e697SEric W. Biederman if (n) { 20456640e697SEric W. Biederman if (IS_TNODE(n)) { 2046cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2047cb7b593cSStephen Hemminger iter->trie = t; 2048cb7b593cSStephen Hemminger iter->index = 0; 20491d25cd6cSRobert Olsson iter->depth = 1; 20506640e697SEric W. Biederman } else { 20516640e697SEric W. Biederman iter->tnode = NULL; 20526640e697SEric W. Biederman iter->trie = t; 20536640e697SEric W. Biederman iter->index = 0; 20546640e697SEric W. Biederman iter->depth = 0; 20556640e697SEric W. Biederman } 2056cb7b593cSStephen Hemminger return n; 2057cb7b593cSStephen Hemminger } 2058cb7b593cSStephen Hemminger return NULL; 2059cb7b593cSStephen Hemminger } 2060cb7b593cSStephen Hemminger 2061cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 206219baf839SRobert Olsson { 20632373ce1cSRobert Olsson struct node *n; 2064cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2065cb7b593cSStephen Hemminger 2066cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 206719baf839SRobert Olsson 20682373ce1cSRobert Olsson rcu_read_lock(); 2069cb7b593cSStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; 2070cb7b593cSStephen Hemminger n = fib_trie_get_next(&iter)) { 2071cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 207219baf839SRobert Olsson s->leaves++; 2073cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2074cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2075cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 207691b9a277SOlof Johansson } else { 2077cb7b593cSStephen Hemminger const struct tnode *tn = (const struct tnode *) n; 2078cb7b593cSStephen Hemminger int i; 207919baf839SRobert Olsson 208019baf839SRobert Olsson s->tnodes++; 208106ef921dSRobert Olsson if(tn->bits < MAX_STAT_DEPTH) 208219baf839SRobert Olsson s->nodesizes[tn->bits]++; 208306ef921dSRobert Olsson 2084cb7b593cSStephen Hemminger for (i = 0; i < (1<<tn->bits); i++) 2085cb7b593cSStephen Hemminger if (!tn->child[i]) 208619baf839SRobert Olsson s->nullpointers++; 208719baf839SRobert Olsson } 208819baf839SRobert Olsson } 20892373ce1cSRobert Olsson rcu_read_unlock(); 209019baf839SRobert Olsson } 209119baf839SRobert Olsson 209219baf839SRobert Olsson /* 209319baf839SRobert Olsson * This outputs /proc/net/fib_triestats 209419baf839SRobert Olsson */ 2095cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 209619baf839SRobert Olsson { 2097cb7b593cSStephen Hemminger unsigned i, max, pointers, bytes, avdepth; 209819baf839SRobert Olsson 209919baf839SRobert Olsson if (stat->leaves) 210019baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 210119baf839SRobert Olsson else 210219baf839SRobert Olsson avdepth = 0; 210319baf839SRobert Olsson 2104cb7b593cSStephen Hemminger seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); 2105cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2106cb7b593cSStephen Hemminger 2107cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2108cb7b593cSStephen Hemminger 2109cb7b593cSStephen Hemminger bytes = sizeof(struct leaf) * stat->leaves; 2110cb7b593cSStephen Hemminger seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); 211119baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 211219baf839SRobert Olsson 211306ef921dSRobert Olsson max = MAX_STAT_DEPTH; 211406ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 211519baf839SRobert Olsson max--; 211619baf839SRobert Olsson 2117cb7b593cSStephen Hemminger pointers = 0; 211819baf839SRobert Olsson for (i = 1; i <= max; i++) 211919baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 212019baf839SRobert Olsson seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 212119baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 212219baf839SRobert Olsson } 2123cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2124cb7b593cSStephen Hemminger seq_printf(seq, "\tPointers: %d\n", pointers); 2125cb7b593cSStephen Hemminger 212619baf839SRobert Olsson bytes += sizeof(struct node *) * pointers; 212719baf839SRobert Olsson seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2128cb7b593cSStephen Hemminger seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); 212919baf839SRobert Olsson 213019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 213119baf839SRobert Olsson seq_printf(seq, "Counters:\n---------\n"); 213219baf839SRobert Olsson seq_printf(seq,"gets = %d\n", t->stats.gets); 213319baf839SRobert Olsson seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 213419baf839SRobert Olsson seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 213519baf839SRobert Olsson seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 213619baf839SRobert Olsson seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 21372f36895aSRobert Olsson seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); 213819baf839SRobert Olsson #ifdef CLEAR_STATS 213919baf839SRobert Olsson memset(&(t->stats), 0, sizeof(t->stats)); 214019baf839SRobert Olsson #endif 214119baf839SRobert Olsson #endif /* CONFIG_IP_FIB_TRIE_STATS */ 214219baf839SRobert Olsson } 214319baf839SRobert Olsson 214419baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 214519baf839SRobert Olsson { 2146cb7b593cSStephen Hemminger struct trie_stat *stat; 214719baf839SRobert Olsson 2148cb7b593cSStephen Hemminger stat = kmalloc(sizeof(*stat), GFP_KERNEL); 2149cb7b593cSStephen Hemminger if (!stat) 2150cb7b593cSStephen Hemminger return -ENOMEM; 2151cb7b593cSStephen Hemminger 215219baf839SRobert Olsson seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 215319baf839SRobert Olsson sizeof(struct leaf), sizeof(struct tnode)); 215419baf839SRobert Olsson 2155cb7b593cSStephen Hemminger if (trie_local) { 2156cb7b593cSStephen Hemminger seq_printf(seq, "Local:\n"); 2157cb7b593cSStephen Hemminger trie_collect_stats(trie_local, stat); 2158cb7b593cSStephen Hemminger trie_show_stats(seq, stat); 215919baf839SRobert Olsson } 2160cb7b593cSStephen Hemminger 2161cb7b593cSStephen Hemminger if (trie_main) { 2162cb7b593cSStephen Hemminger seq_printf(seq, "Main:\n"); 2163cb7b593cSStephen Hemminger trie_collect_stats(trie_main, stat); 2164cb7b593cSStephen Hemminger trie_show_stats(seq, stat); 2165cb7b593cSStephen Hemminger } 2166cb7b593cSStephen Hemminger kfree(stat); 2167cb7b593cSStephen Hemminger 216819baf839SRobert Olsson return 0; 216919baf839SRobert Olsson } 217019baf839SRobert Olsson 217119baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 217219baf839SRobert Olsson { 2173cb7b593cSStephen Hemminger return single_open(file, fib_triestat_seq_show, NULL); 217419baf839SRobert Olsson } 217519baf839SRobert Olsson 2176*9a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 217719baf839SRobert Olsson .owner = THIS_MODULE, 217819baf839SRobert Olsson .open = fib_triestat_seq_open, 217919baf839SRobert Olsson .read = seq_read, 218019baf839SRobert Olsson .llseek = seq_lseek, 2181cb7b593cSStephen Hemminger .release = single_release, 218219baf839SRobert Olsson }; 218319baf839SRobert Olsson 2184cb7b593cSStephen Hemminger static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2185cb7b593cSStephen Hemminger loff_t pos) 218619baf839SRobert Olsson { 2187cb7b593cSStephen Hemminger loff_t idx = 0; 2188cb7b593cSStephen Hemminger struct node *n; 2189cb7b593cSStephen Hemminger 2190cb7b593cSStephen Hemminger for (n = fib_trie_get_first(iter, trie_local); 2191cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2192cb7b593cSStephen Hemminger if (pos == idx) 2193cb7b593cSStephen Hemminger return n; 219419baf839SRobert Olsson } 219519baf839SRobert Olsson 2196cb7b593cSStephen Hemminger for (n = fib_trie_get_first(iter, trie_main); 2197cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2198cb7b593cSStephen Hemminger if (pos == idx) 2199cb7b593cSStephen Hemminger return n; 220019baf839SRobert Olsson } 220119baf839SRobert Olsson return NULL; 220219baf839SRobert Olsson } 220319baf839SRobert Olsson 220419baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 220519baf839SRobert Olsson { 2206cb7b593cSStephen Hemminger rcu_read_lock(); 2207cb7b593cSStephen Hemminger if (*pos == 0) 220891b9a277SOlof Johansson return SEQ_START_TOKEN; 2209cb7b593cSStephen Hemminger return fib_trie_get_idx(seq->private, *pos - 1); 221019baf839SRobert Olsson } 221119baf839SRobert Olsson 221219baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 221319baf839SRobert Olsson { 2214cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 2215cb7b593cSStephen Hemminger void *l = v; 2216cb7b593cSStephen Hemminger 221719baf839SRobert Olsson ++*pos; 221891b9a277SOlof Johansson if (v == SEQ_START_TOKEN) 2219cb7b593cSStephen Hemminger return fib_trie_get_idx(iter, 0); 222091b9a277SOlof Johansson 2221cb7b593cSStephen Hemminger v = fib_trie_get_next(iter); 2222cb7b593cSStephen Hemminger BUG_ON(v == l); 2223cb7b593cSStephen Hemminger if (v) 2224cb7b593cSStephen Hemminger return v; 2225cb7b593cSStephen Hemminger 2226cb7b593cSStephen Hemminger /* continue scan in next trie */ 2227cb7b593cSStephen Hemminger if (iter->trie == trie_local) 2228cb7b593cSStephen Hemminger return fib_trie_get_first(iter, trie_main); 2229cb7b593cSStephen Hemminger 2230cb7b593cSStephen Hemminger return NULL; 223119baf839SRobert Olsson } 223219baf839SRobert Olsson 223319baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 223419baf839SRobert Olsson { 2235cb7b593cSStephen Hemminger rcu_read_unlock(); 223619baf839SRobert Olsson } 223719baf839SRobert Olsson 2238cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2239cb7b593cSStephen Hemminger { 2240cb7b593cSStephen Hemminger while (n-- > 0) seq_puts(seq, " "); 2241cb7b593cSStephen Hemminger } 224219baf839SRobert Olsson 2243cb7b593cSStephen Hemminger static inline const char *rtn_scope(enum rt_scope_t s) 2244cb7b593cSStephen Hemminger { 2245cb7b593cSStephen Hemminger static char buf[32]; 2246cb7b593cSStephen Hemminger 2247cb7b593cSStephen Hemminger switch(s) { 2248cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2249cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2250cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2251cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2252cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2253cb7b593cSStephen Hemminger default: 2254cb7b593cSStephen Hemminger snprintf(buf, sizeof(buf), "scope=%d", s); 2255cb7b593cSStephen Hemminger return buf; 2256cb7b593cSStephen Hemminger } 2257cb7b593cSStephen Hemminger } 2258cb7b593cSStephen Hemminger 2259cb7b593cSStephen Hemminger static const char *rtn_type_names[__RTN_MAX] = { 2260cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2261cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2262cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2263cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2264cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2265cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2266cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2267cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2268cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2269cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2270cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2271cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2272cb7b593cSStephen Hemminger }; 2273cb7b593cSStephen Hemminger 2274cb7b593cSStephen Hemminger static inline const char *rtn_type(unsigned t) 2275cb7b593cSStephen Hemminger { 2276cb7b593cSStephen Hemminger static char buf[32]; 2277cb7b593cSStephen Hemminger 2278cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2279cb7b593cSStephen Hemminger return rtn_type_names[t]; 2280cb7b593cSStephen Hemminger snprintf(buf, sizeof(buf), "type %d", t); 2281cb7b593cSStephen Hemminger return buf; 2282cb7b593cSStephen Hemminger } 2283cb7b593cSStephen Hemminger 2284cb7b593cSStephen Hemminger /* Pretty print the trie */ 228519baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 228619baf839SRobert Olsson { 2287cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2288cb7b593cSStephen Hemminger struct node *n = v; 228919baf839SRobert Olsson 2290cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) 2291cb7b593cSStephen Hemminger return 0; 229219baf839SRobert Olsson 2293cb7b593cSStephen Hemminger if (!NODE_PARENT(n)) { 2294cb7b593cSStephen Hemminger if (iter->trie == trie_local) 2295cb7b593cSStephen Hemminger seq_puts(seq, "<local>:\n"); 2296cb7b593cSStephen Hemminger else 2297cb7b593cSStephen Hemminger seq_puts(seq, "<main>:\n"); 2298cb7b593cSStephen Hemminger } 2299095b8501SRobert Olsson 2300095b8501SRobert Olsson if (IS_TNODE(n)) { 2301095b8501SRobert Olsson struct tnode *tn = (struct tnode *) n; 2302095b8501SRobert Olsson __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); 2303095b8501SRobert Olsson 23041d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 23051d25cd6cSRobert Olsson seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", 23061d25cd6cSRobert Olsson NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 23071d25cd6cSRobert Olsson tn->empty_children); 23081d25cd6cSRobert Olsson 2309cb7b593cSStephen Hemminger } else { 2310cb7b593cSStephen Hemminger struct leaf *l = (struct leaf *) n; 2311cb7b593cSStephen Hemminger int i; 231232ab5f80SAl Viro __be32 val = htonl(l->key); 2313cb7b593cSStephen Hemminger 2314cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2315cb7b593cSStephen Hemminger seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2316cb7b593cSStephen Hemminger for (i = 32; i >= 0; i--) { 2317772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, i); 2318cb7b593cSStephen Hemminger if (li) { 2319cb7b593cSStephen Hemminger struct fib_alias *fa; 2320cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2321cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 2322cb7b593cSStephen Hemminger seq_printf(seq, " /%d %s %s", i, 2323cb7b593cSStephen Hemminger rtn_scope(fa->fa_scope), 2324cb7b593cSStephen Hemminger rtn_type(fa->fa_type)); 2325cb7b593cSStephen Hemminger if (fa->fa_tos) 2326cb7b593cSStephen Hemminger seq_printf(seq, "tos =%d\n", 2327cb7b593cSStephen Hemminger fa->fa_tos); 2328cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2329cb7b593cSStephen Hemminger } 2330cb7b593cSStephen Hemminger } 2331cb7b593cSStephen Hemminger } 233219baf839SRobert Olsson } 233319baf839SRobert Olsson 233419baf839SRobert Olsson return 0; 233519baf839SRobert Olsson } 233619baf839SRobert Olsson 233719baf839SRobert Olsson static struct seq_operations fib_trie_seq_ops = { 233819baf839SRobert Olsson .start = fib_trie_seq_start, 233919baf839SRobert Olsson .next = fib_trie_seq_next, 234019baf839SRobert Olsson .stop = fib_trie_seq_stop, 234119baf839SRobert Olsson .show = fib_trie_seq_show, 234219baf839SRobert Olsson }; 234319baf839SRobert Olsson 234419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 234519baf839SRobert Olsson { 234619baf839SRobert Olsson struct seq_file *seq; 234719baf839SRobert Olsson int rc = -ENOMEM; 2348cb7b593cSStephen Hemminger struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 2349cb7b593cSStephen Hemminger 2350cb7b593cSStephen Hemminger if (!s) 2351cb7b593cSStephen Hemminger goto out; 235219baf839SRobert Olsson 235319baf839SRobert Olsson rc = seq_open(file, &fib_trie_seq_ops); 235419baf839SRobert Olsson if (rc) 235519baf839SRobert Olsson goto out_kfree; 235619baf839SRobert Olsson 235719baf839SRobert Olsson seq = file->private_data; 2358cb7b593cSStephen Hemminger seq->private = s; 2359cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 236019baf839SRobert Olsson out: 236119baf839SRobert Olsson return rc; 236219baf839SRobert Olsson out_kfree: 2363cb7b593cSStephen Hemminger kfree(s); 236419baf839SRobert Olsson goto out; 236519baf839SRobert Olsson } 236619baf839SRobert Olsson 2367*9a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 236819baf839SRobert Olsson .owner = THIS_MODULE, 236919baf839SRobert Olsson .open = fib_trie_seq_open, 237019baf839SRobert Olsson .read = seq_read, 237119baf839SRobert Olsson .llseek = seq_lseek, 237219baf839SRobert Olsson .release = seq_release_private, 237319baf839SRobert Olsson }; 237419baf839SRobert Olsson 237532ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2376cb7b593cSStephen Hemminger { 2377cb7b593cSStephen Hemminger static unsigned type2flags[RTN_MAX + 1] = { 2378cb7b593cSStephen Hemminger [7] = RTF_REJECT, [8] = RTF_REJECT, 2379cb7b593cSStephen Hemminger }; 2380cb7b593cSStephen Hemminger unsigned flags = type2flags[type]; 2381cb7b593cSStephen Hemminger 2382cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2383cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 238432ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2385cb7b593cSStephen Hemminger flags |= RTF_HOST; 2386cb7b593cSStephen Hemminger flags |= RTF_UP; 2387cb7b593cSStephen Hemminger return flags; 2388cb7b593cSStephen Hemminger } 2389cb7b593cSStephen Hemminger 2390cb7b593cSStephen Hemminger /* 2391cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2392cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2393cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2394cb7b593cSStephen Hemminger * legacy utilities 2395cb7b593cSStephen Hemminger */ 2396cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2397cb7b593cSStephen Hemminger { 2398c9e53cbeSPatrick McHardy const struct fib_trie_iter *iter = seq->private; 2399cb7b593cSStephen Hemminger struct leaf *l = v; 2400cb7b593cSStephen Hemminger int i; 2401cb7b593cSStephen Hemminger char bf[128]; 2402cb7b593cSStephen Hemminger 2403cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2404cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2405cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2406cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2407cb7b593cSStephen Hemminger return 0; 2408cb7b593cSStephen Hemminger } 2409cb7b593cSStephen Hemminger 2410c9e53cbeSPatrick McHardy if (iter->trie == trie_local) 2411c9e53cbeSPatrick McHardy return 0; 2412cb7b593cSStephen Hemminger if (IS_TNODE(l)) 2413cb7b593cSStephen Hemminger return 0; 2414cb7b593cSStephen Hemminger 2415cb7b593cSStephen Hemminger for (i=32; i>=0; i--) { 2416772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, i); 2417cb7b593cSStephen Hemminger struct fib_alias *fa; 241832ab5f80SAl Viro __be32 mask, prefix; 2419cb7b593cSStephen Hemminger 2420cb7b593cSStephen Hemminger if (!li) 2421cb7b593cSStephen Hemminger continue; 2422cb7b593cSStephen Hemminger 2423cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2424cb7b593cSStephen Hemminger prefix = htonl(l->key); 2425cb7b593cSStephen Hemminger 2426cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 24271371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2428cb7b593cSStephen Hemminger unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2429cb7b593cSStephen Hemminger 2430cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2431cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2432cb7b593cSStephen Hemminger continue; 2433cb7b593cSStephen Hemminger 2434cb7b593cSStephen Hemminger if (fi) 2435cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2436cb7b593cSStephen Hemminger "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2437cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2438cb7b593cSStephen Hemminger prefix, 2439cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2440cb7b593cSStephen Hemminger fi->fib_priority, 2441cb7b593cSStephen Hemminger mask, 2442cb7b593cSStephen Hemminger (fi->fib_advmss ? fi->fib_advmss + 40 : 0), 2443cb7b593cSStephen Hemminger fi->fib_window, 2444cb7b593cSStephen Hemminger fi->fib_rtt >> 3); 2445cb7b593cSStephen Hemminger else 2446cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2447cb7b593cSStephen Hemminger "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2448cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2449cb7b593cSStephen Hemminger mask, 0, 0, 0); 2450cb7b593cSStephen Hemminger 2451cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", bf); 2452cb7b593cSStephen Hemminger } 2453cb7b593cSStephen Hemminger } 2454cb7b593cSStephen Hemminger 2455cb7b593cSStephen Hemminger return 0; 2456cb7b593cSStephen Hemminger } 2457cb7b593cSStephen Hemminger 2458cb7b593cSStephen Hemminger static struct seq_operations fib_route_seq_ops = { 2459cb7b593cSStephen Hemminger .start = fib_trie_seq_start, 2460cb7b593cSStephen Hemminger .next = fib_trie_seq_next, 2461cb7b593cSStephen Hemminger .stop = fib_trie_seq_stop, 2462cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2463cb7b593cSStephen Hemminger }; 2464cb7b593cSStephen Hemminger 2465cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2466cb7b593cSStephen Hemminger { 2467cb7b593cSStephen Hemminger struct seq_file *seq; 2468cb7b593cSStephen Hemminger int rc = -ENOMEM; 2469cb7b593cSStephen Hemminger struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); 2470cb7b593cSStephen Hemminger 2471cb7b593cSStephen Hemminger if (!s) 2472cb7b593cSStephen Hemminger goto out; 2473cb7b593cSStephen Hemminger 2474cb7b593cSStephen Hemminger rc = seq_open(file, &fib_route_seq_ops); 2475cb7b593cSStephen Hemminger if (rc) 2476cb7b593cSStephen Hemminger goto out_kfree; 2477cb7b593cSStephen Hemminger 2478cb7b593cSStephen Hemminger seq = file->private_data; 2479cb7b593cSStephen Hemminger seq->private = s; 2480cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 2481cb7b593cSStephen Hemminger out: 2482cb7b593cSStephen Hemminger return rc; 2483cb7b593cSStephen Hemminger out_kfree: 2484cb7b593cSStephen Hemminger kfree(s); 2485cb7b593cSStephen Hemminger goto out; 2486cb7b593cSStephen Hemminger } 2487cb7b593cSStephen Hemminger 2488*9a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2489cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2490cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2491cb7b593cSStephen Hemminger .read = seq_read, 2492cb7b593cSStephen Hemminger .llseek = seq_lseek, 2493cb7b593cSStephen Hemminger .release = seq_release_private, 2494cb7b593cSStephen Hemminger }; 2495cb7b593cSStephen Hemminger 249619baf839SRobert Olsson int __init fib_proc_init(void) 249719baf839SRobert Olsson { 2498cb7b593cSStephen Hemminger if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) 2499cb7b593cSStephen Hemminger goto out1; 2500cb7b593cSStephen Hemminger 2501cb7b593cSStephen Hemminger if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) 2502cb7b593cSStephen Hemminger goto out2; 2503cb7b593cSStephen Hemminger 2504cb7b593cSStephen Hemminger if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) 2505cb7b593cSStephen Hemminger goto out3; 2506cb7b593cSStephen Hemminger 250719baf839SRobert Olsson return 0; 2508cb7b593cSStephen Hemminger 2509cb7b593cSStephen Hemminger out3: 2510cb7b593cSStephen Hemminger proc_net_remove("fib_triestat"); 2511cb7b593cSStephen Hemminger out2: 2512cb7b593cSStephen Hemminger proc_net_remove("fib_trie"); 2513cb7b593cSStephen Hemminger out1: 2514cb7b593cSStephen Hemminger return -ENOMEM; 251519baf839SRobert Olsson } 251619baf839SRobert Olsson 251719baf839SRobert Olsson void __init fib_proc_exit(void) 251819baf839SRobert Olsson { 251919baf839SRobert Olsson proc_net_remove("fib_trie"); 2520cb7b593cSStephen Hemminger proc_net_remove("fib_triestat"); 2521cb7b593cSStephen Hemminger proc_net_remove("route"); 252219baf839SRobert Olsson } 252319baf839SRobert Olsson 252419baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2525