119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1519baf839SRobert Olsson * This work is based on the LPC-trie which is originally descibed in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 1919baf839SRobert Olsson * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ 2619baf839SRobert Olsson * 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2919baf839SRobert Olsson * 3019baf839SRobert Olsson * 3119baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3219baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3319baf839SRobert Olsson * interface as the means of communication with the user level. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3619baf839SRobert Olsson * 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3919baf839SRobert Olsson * 4019baf839SRobert Olsson * This program is free software; you can redistribute it and/or 4119baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4219baf839SRobert Olsson * as published by the Free Software Foundation; either version 4319baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * Substantial contributions to this work comes from: 46fd966255SRobert Olsson * 47fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 48fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 49fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 50fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 5119baf839SRobert Olsson */ 5219baf839SRobert Olsson 5305eee48cSRobert Olsson #define VERSION "0.408" 5419baf839SRobert Olsson 5519baf839SRobert Olsson #include <asm/uaccess.h> 5619baf839SRobert Olsson #include <asm/system.h> 571977f032SJiri Slaby #include <linux/bitops.h> 5819baf839SRobert Olsson #include <linux/types.h> 5919baf839SRobert Olsson #include <linux/kernel.h> 6019baf839SRobert Olsson #include <linux/mm.h> 6119baf839SRobert Olsson #include <linux/string.h> 6219baf839SRobert Olsson #include <linux/socket.h> 6319baf839SRobert Olsson #include <linux/sockios.h> 6419baf839SRobert Olsson #include <linux/errno.h> 6519baf839SRobert Olsson #include <linux/in.h> 6619baf839SRobert Olsson #include <linux/inet.h> 67cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6819baf839SRobert Olsson #include <linux/netdevice.h> 6919baf839SRobert Olsson #include <linux/if_arp.h> 7019baf839SRobert Olsson #include <linux/proc_fs.h> 712373ce1cSRobert Olsson #include <linux/rcupdate.h> 7219baf839SRobert Olsson #include <linux/skbuff.h> 7319baf839SRobert Olsson #include <linux/netlink.h> 7419baf839SRobert Olsson #include <linux/init.h> 7519baf839SRobert Olsson #include <linux/list.h> 76457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7719baf839SRobert Olsson #include <net/ip.h> 7819baf839SRobert Olsson #include <net/protocol.h> 7919baf839SRobert Olsson #include <net/route.h> 8019baf839SRobert Olsson #include <net/tcp.h> 8119baf839SRobert Olsson #include <net/sock.h> 8219baf839SRobert Olsson #include <net/ip_fib.h> 8319baf839SRobert Olsson #include "fib_lookup.h" 8419baf839SRobert Olsson 8506ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8619baf839SRobert Olsson 8719baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8819baf839SRobert Olsson 8919baf839SRobert Olsson typedef unsigned int t_key; 9019baf839SRobert Olsson 9119baf839SRobert Olsson #define T_TNODE 0 9219baf839SRobert Olsson #define T_LEAF 1 9319baf839SRobert Olsson #define NODE_TYPE_MASK 0x1UL 942373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) 952373ce1cSRobert Olsson 9691b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF)) 9791b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF) 9819baf839SRobert Olsson 9919baf839SRobert Olsson struct node { 10091b9a277SOlof Johansson unsigned long parent; 1018d965444SEric Dumazet t_key key; 10219baf839SRobert Olsson }; 10319baf839SRobert Olsson 10419baf839SRobert Olsson struct leaf { 10591b9a277SOlof Johansson unsigned long parent; 1068d965444SEric Dumazet t_key key; 10719baf839SRobert Olsson struct hlist_head list; 1082373ce1cSRobert Olsson struct rcu_head rcu; 10919baf839SRobert Olsson }; 11019baf839SRobert Olsson 11119baf839SRobert Olsson struct leaf_info { 11219baf839SRobert Olsson struct hlist_node hlist; 1132373ce1cSRobert Olsson struct rcu_head rcu; 11419baf839SRobert Olsson int plen; 11519baf839SRobert Olsson struct list_head falh; 11619baf839SRobert Olsson }; 11719baf839SRobert Olsson 11819baf839SRobert Olsson struct tnode { 11991b9a277SOlof Johansson unsigned long parent; 1208d965444SEric Dumazet t_key key; 121112d8cfcSEric Dumazet unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 122112d8cfcSEric Dumazet unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 1238d965444SEric Dumazet unsigned int full_children; /* KEYLENGTH bits needed */ 1248d965444SEric Dumazet unsigned int empty_children; /* KEYLENGTH bits needed */ 1252373ce1cSRobert Olsson struct rcu_head rcu; 12619baf839SRobert Olsson struct node *child[0]; 12719baf839SRobert Olsson }; 12819baf839SRobert Olsson 12919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13019baf839SRobert Olsson struct trie_use_stats { 13119baf839SRobert Olsson unsigned int gets; 13219baf839SRobert Olsson unsigned int backtrack; 13319baf839SRobert Olsson unsigned int semantic_match_passed; 13419baf839SRobert Olsson unsigned int semantic_match_miss; 13519baf839SRobert Olsson unsigned int null_node_hit; 1362f36895aSRobert Olsson unsigned int resize_node_skipped; 13719baf839SRobert Olsson }; 13819baf839SRobert Olsson #endif 13919baf839SRobert Olsson 14019baf839SRobert Olsson struct trie_stat { 14119baf839SRobert Olsson unsigned int totdepth; 14219baf839SRobert Olsson unsigned int maxdepth; 14319baf839SRobert Olsson unsigned int tnodes; 14419baf839SRobert Olsson unsigned int leaves; 14519baf839SRobert Olsson unsigned int nullpointers; 14606ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 14719baf839SRobert Olsson }; 14819baf839SRobert Olsson 14919baf839SRobert Olsson struct trie { 15019baf839SRobert Olsson struct node *trie; 151d717a9a6SStephen Hemminger unsigned int size; 15219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 15319baf839SRobert Olsson struct trie_use_stats stats; 15419baf839SRobert Olsson #endif 15519baf839SRobert Olsson }; 15619baf839SRobert Olsson 15719baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 15819baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 15919baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn); 1602f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn); 1612f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn); 16219baf839SRobert Olsson static void tnode_free(struct tnode *tn); 16319baf839SRobert Olsson 164e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 16519baf839SRobert Olsson 16606801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node) 16706801916SStephen Hemminger { 168b59cfbf7SEric Dumazet return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 169b59cfbf7SEric Dumazet } 17006801916SStephen Hemminger 171b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node) 172b59cfbf7SEric Dumazet { 173b59cfbf7SEric Dumazet struct tnode *ret = node_parent(node); 174b59cfbf7SEric Dumazet 17506801916SStephen Hemminger return rcu_dereference(ret); 17606801916SStephen Hemminger } 17706801916SStephen Hemminger 17806801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr) 17906801916SStephen Hemminger { 18006801916SStephen Hemminger rcu_assign_pointer(node->parent, 18106801916SStephen Hemminger (unsigned long)ptr | NODE_TYPE(node)); 18206801916SStephen Hemminger } 1832373ce1cSRobert Olsson 184b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 18519baf839SRobert Olsson { 186b59cfbf7SEric Dumazet BUG_ON(i >= 1U << tn->bits); 18719baf839SRobert Olsson 188b59cfbf7SEric Dumazet return tn->child[i]; 189b59cfbf7SEric Dumazet } 190b59cfbf7SEric Dumazet 191b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 192b59cfbf7SEric Dumazet { 193b59cfbf7SEric Dumazet struct node *ret = tnode_get_child(tn, i); 194b59cfbf7SEric Dumazet 195b59cfbf7SEric Dumazet return rcu_dereference(ret); 19619baf839SRobert Olsson } 19719baf839SRobert Olsson 198bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn) 19919baf839SRobert Olsson { 20019baf839SRobert Olsson return 1 << tn->bits; 20119baf839SRobert Olsson } 20219baf839SRobert Olsson 203ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l) 204ab66b4a7SStephen Hemminger { 205ab66b4a7SStephen Hemminger return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 206ab66b4a7SStephen Hemminger } 207ab66b4a7SStephen Hemminger 20819baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 20919baf839SRobert Olsson { 21019baf839SRobert Olsson if (offset < KEYLENGTH) 21119baf839SRobert Olsson return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 21219baf839SRobert Olsson else 21319baf839SRobert Olsson return 0; 21419baf839SRobert Olsson } 21519baf839SRobert Olsson 21619baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b) 21719baf839SRobert Olsson { 21819baf839SRobert Olsson return a == b; 21919baf839SRobert Olsson } 22019baf839SRobert Olsson 22119baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 22219baf839SRobert Olsson { 22319baf839SRobert Olsson if (bits == 0 || offset >= KEYLENGTH) 22419baf839SRobert Olsson return 1; 22519baf839SRobert Olsson bits = bits > KEYLENGTH ? KEYLENGTH : bits; 22619baf839SRobert Olsson return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 22719baf839SRobert Olsson } 22819baf839SRobert Olsson 22919baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b) 23019baf839SRobert Olsson { 23119baf839SRobert Olsson t_key diff = a ^ b; 23219baf839SRobert Olsson int i = offset; 23319baf839SRobert Olsson 23419baf839SRobert Olsson if (!diff) 23519baf839SRobert Olsson return 0; 23619baf839SRobert Olsson while ((diff << i) >> (KEYLENGTH-1) == 0) 23719baf839SRobert Olsson i++; 23819baf839SRobert Olsson return i; 23919baf839SRobert Olsson } 24019baf839SRobert Olsson 24119baf839SRobert Olsson /* 24219baf839SRobert Olsson To understand this stuff, an understanding of keys and all their bits is 24319baf839SRobert Olsson necessary. Every node in the trie has a key associated with it, but not 24419baf839SRobert Olsson all of the bits in that key are significant. 24519baf839SRobert Olsson 24619baf839SRobert Olsson Consider a node 'n' and its parent 'tp'. 24719baf839SRobert Olsson 24819baf839SRobert Olsson If n is a leaf, every bit in its key is significant. Its presence is 249772cb712SRobert Olsson necessitated by path compression, since during a tree traversal (when 25019baf839SRobert Olsson searching for a leaf - unless we are doing an insertion) we will completely 25119baf839SRobert Olsson ignore all skipped bits we encounter. Thus we need to verify, at the end of 25219baf839SRobert Olsson a potentially successful search, that we have indeed been walking the 25319baf839SRobert Olsson correct key path. 25419baf839SRobert Olsson 25519baf839SRobert Olsson Note that we can never "miss" the correct key in the tree if present by 25619baf839SRobert Olsson following the wrong path. Path compression ensures that segments of the key 25719baf839SRobert Olsson that are the same for all keys with a given prefix are skipped, but the 25819baf839SRobert Olsson skipped part *is* identical for each node in the subtrie below the skipped 25919baf839SRobert Olsson bit! trie_insert() in this implementation takes care of that - note the 26019baf839SRobert Olsson call to tkey_sub_equals() in trie_insert(). 26119baf839SRobert Olsson 26219baf839SRobert Olsson if n is an internal node - a 'tnode' here, the various parts of its key 26319baf839SRobert Olsson have many different meanings. 26419baf839SRobert Olsson 26519baf839SRobert Olsson Example: 26619baf839SRobert Olsson _________________________________________________________________ 26719baf839SRobert Olsson | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 26819baf839SRobert Olsson ----------------------------------------------------------------- 26919baf839SRobert Olsson 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 27019baf839SRobert Olsson 27119baf839SRobert Olsson _________________________________________________________________ 27219baf839SRobert Olsson | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 27319baf839SRobert Olsson ----------------------------------------------------------------- 27419baf839SRobert Olsson 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 27519baf839SRobert Olsson 27619baf839SRobert Olsson tp->pos = 7 27719baf839SRobert Olsson tp->bits = 3 27819baf839SRobert Olsson n->pos = 15 27919baf839SRobert Olsson n->bits = 4 28019baf839SRobert Olsson 28119baf839SRobert Olsson First, let's just ignore the bits that come before the parent tp, that is 28219baf839SRobert Olsson the bits from 0 to (tp->pos-1). They are *known* but at this point we do 28319baf839SRobert Olsson not use them for anything. 28419baf839SRobert Olsson 28519baf839SRobert Olsson The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 28619baf839SRobert Olsson index into the parent's child array. That is, they will be used to find 28719baf839SRobert Olsson 'n' among tp's children. 28819baf839SRobert Olsson 28919baf839SRobert Olsson The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 29019baf839SRobert Olsson for the node n. 29119baf839SRobert Olsson 29219baf839SRobert Olsson All the bits we have seen so far are significant to the node n. The rest 29319baf839SRobert Olsson of the bits are really not needed or indeed known in n->key. 29419baf839SRobert Olsson 29519baf839SRobert Olsson The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 29619baf839SRobert Olsson n's child array, and will of course be different for each child. 29719baf839SRobert Olsson 298c877efb2SStephen Hemminger 29919baf839SRobert Olsson The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 30019baf839SRobert Olsson at this point. 30119baf839SRobert Olsson 30219baf839SRobert Olsson */ 30319baf839SRobert Olsson 3040c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn) 30519baf839SRobert Olsson { 3060c7770c7SStephen Hemminger WARN_ON(tn && tn->pos+tn->bits > 32); 30719baf839SRobert Olsson } 30819baf839SRobert Olsson 309f5026fabSDenis V. Lunev static const int halve_threshold = 25; 310f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 311f5026fabSDenis V. Lunev static const int halve_threshold_root = 8; 312f5026fabSDenis V. Lunev static const int inflate_threshold_root = 15; 31319baf839SRobert Olsson 3142373ce1cSRobert Olsson 3152373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3162373ce1cSRobert Olsson { 3172373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3182373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3192373ce1cSRobert Olsson } 3202373ce1cSRobert Olsson 3212373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3222373ce1cSRobert Olsson { 3232373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3242373ce1cSRobert Olsson } 3252373ce1cSRobert Olsson 3262373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head) 3272373ce1cSRobert Olsson { 3282373ce1cSRobert Olsson kfree(container_of(head, struct leaf, rcu)); 3292373ce1cSRobert Olsson } 3302373ce1cSRobert Olsson 3312373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head) 3322373ce1cSRobert Olsson { 3332373ce1cSRobert Olsson kfree(container_of(head, struct leaf_info, rcu)); 3342373ce1cSRobert Olsson } 3352373ce1cSRobert Olsson 3362373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 3372373ce1cSRobert Olsson { 3382373ce1cSRobert Olsson call_rcu(&leaf->rcu, __leaf_info_free_rcu); 3392373ce1cSRobert Olsson } 3402373ce1cSRobert Olsson 3418d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size) 3422373ce1cSRobert Olsson { 3432373ce1cSRobert Olsson struct page *pages; 3442373ce1cSRobert Olsson 3452373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3468d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 3472373ce1cSRobert Olsson 3482373ce1cSRobert Olsson pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 3492373ce1cSRobert Olsson if (!pages) 3502373ce1cSRobert Olsson return NULL; 3512373ce1cSRobert Olsson 3522373ce1cSRobert Olsson return page_address(pages); 3532373ce1cSRobert Olsson } 3542373ce1cSRobert Olsson 3552373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head) 3562373ce1cSRobert Olsson { 3572373ce1cSRobert Olsson struct tnode *tn = container_of(head, struct tnode, rcu); 3588d965444SEric Dumazet size_t size = sizeof(struct tnode) + 3598d965444SEric Dumazet (sizeof(struct node *) << tn->bits); 3602373ce1cSRobert Olsson 3612373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3622373ce1cSRobert Olsson kfree(tn); 3632373ce1cSRobert Olsson else 3642373ce1cSRobert Olsson free_pages((unsigned long)tn, get_order(size)); 3652373ce1cSRobert Olsson } 3662373ce1cSRobert Olsson 3672373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn) 3682373ce1cSRobert Olsson { 369550e29bcSRobert Olsson if (IS_LEAF(tn)) { 370550e29bcSRobert Olsson struct leaf *l = (struct leaf *) tn; 371550e29bcSRobert Olsson call_rcu_bh(&l->rcu, __leaf_free_rcu); 372132adf54SStephen Hemminger } else 3732373ce1cSRobert Olsson call_rcu(&tn->rcu, __tnode_free_rcu); 3742373ce1cSRobert Olsson } 3752373ce1cSRobert Olsson 37619baf839SRobert Olsson static struct leaf *leaf_new(void) 37719baf839SRobert Olsson { 37819baf839SRobert Olsson struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 37919baf839SRobert Olsson if (l) { 3802373ce1cSRobert Olsson l->parent = T_LEAF; 38119baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 38219baf839SRobert Olsson } 38319baf839SRobert Olsson return l; 38419baf839SRobert Olsson } 38519baf839SRobert Olsson 38619baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 38719baf839SRobert Olsson { 38819baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 3892373ce1cSRobert Olsson if (li) { 39019baf839SRobert Olsson li->plen = plen; 39119baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 3922373ce1cSRobert Olsson } 39319baf839SRobert Olsson return li; 39419baf839SRobert Olsson } 39519baf839SRobert Olsson 39619baf839SRobert Olsson static struct tnode* tnode_new(t_key key, int pos, int bits) 39719baf839SRobert Olsson { 3988d965444SEric Dumazet size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 399f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 40019baf839SRobert Olsson 40119baf839SRobert Olsson if (tn) { 4022373ce1cSRobert Olsson tn->parent = T_TNODE; 40319baf839SRobert Olsson tn->pos = pos; 40419baf839SRobert Olsson tn->bits = bits; 40519baf839SRobert Olsson tn->key = key; 40619baf839SRobert Olsson tn->full_children = 0; 40719baf839SRobert Olsson tn->empty_children = 1<<bits; 40819baf839SRobert Olsson } 409c877efb2SStephen Hemminger 4108d965444SEric Dumazet pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 4118d965444SEric Dumazet (unsigned long) (sizeof(struct node) << bits)); 41219baf839SRobert Olsson return tn; 41319baf839SRobert Olsson } 41419baf839SRobert Olsson 41519baf839SRobert Olsson /* 41619baf839SRobert Olsson * Check whether a tnode 'n' is "full", i.e. it is an internal node 41719baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 41819baf839SRobert Olsson */ 41919baf839SRobert Olsson 420bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n) 42119baf839SRobert Olsson { 42219baf839SRobert Olsson if (n == NULL || IS_LEAF(n)) 42319baf839SRobert Olsson return 0; 42419baf839SRobert Olsson 42519baf839SRobert Olsson return ((struct tnode *) n)->pos == tn->pos + tn->bits; 42619baf839SRobert Olsson } 42719baf839SRobert Olsson 42819baf839SRobert Olsson static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 42919baf839SRobert Olsson { 43019baf839SRobert Olsson tnode_put_child_reorg(tn, i, n, -1); 43119baf839SRobert Olsson } 43219baf839SRobert Olsson 43319baf839SRobert Olsson /* 43419baf839SRobert Olsson * Add a child at position i overwriting the old value. 43519baf839SRobert Olsson * Update the value of full_children and empty_children. 43619baf839SRobert Olsson */ 43719baf839SRobert Olsson 43819baf839SRobert Olsson static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 43919baf839SRobert Olsson { 4402373ce1cSRobert Olsson struct node *chi = tn->child[i]; 44119baf839SRobert Olsson int isfull; 44219baf839SRobert Olsson 4430c7770c7SStephen Hemminger BUG_ON(i >= 1<<tn->bits); 4440c7770c7SStephen Hemminger 44519baf839SRobert Olsson 44619baf839SRobert Olsson /* update emptyChildren */ 44719baf839SRobert Olsson if (n == NULL && chi != NULL) 44819baf839SRobert Olsson tn->empty_children++; 44919baf839SRobert Olsson else if (n != NULL && chi == NULL) 45019baf839SRobert Olsson tn->empty_children--; 45119baf839SRobert Olsson 45219baf839SRobert Olsson /* update fullChildren */ 45319baf839SRobert Olsson if (wasfull == -1) 45419baf839SRobert Olsson wasfull = tnode_full(tn, chi); 45519baf839SRobert Olsson 45619baf839SRobert Olsson isfull = tnode_full(tn, n); 45719baf839SRobert Olsson if (wasfull && !isfull) 45819baf839SRobert Olsson tn->full_children--; 45919baf839SRobert Olsson else if (!wasfull && isfull) 46019baf839SRobert Olsson tn->full_children++; 46191b9a277SOlof Johansson 46219baf839SRobert Olsson if (n) 46306801916SStephen Hemminger node_set_parent(n, tn); 46419baf839SRobert Olsson 4652373ce1cSRobert Olsson rcu_assign_pointer(tn->child[i], n); 46619baf839SRobert Olsson } 46719baf839SRobert Olsson 46819baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn) 46919baf839SRobert Olsson { 47019baf839SRobert Olsson int i; 4712f36895aSRobert Olsson int err = 0; 4722f80b3c8SRobert Olsson struct tnode *old_tn; 473e6308be8SRobert Olsson int inflate_threshold_use; 474e6308be8SRobert Olsson int halve_threshold_use; 47505eee48cSRobert Olsson int max_resize; 47619baf839SRobert Olsson 47719baf839SRobert Olsson if (!tn) 47819baf839SRobert Olsson return NULL; 47919baf839SRobert Olsson 4800c7770c7SStephen Hemminger pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 48119baf839SRobert Olsson tn, inflate_threshold, halve_threshold); 48219baf839SRobert Olsson 48319baf839SRobert Olsson /* No children */ 48419baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn)) { 48519baf839SRobert Olsson tnode_free(tn); 48619baf839SRobert Olsson return NULL; 48719baf839SRobert Olsson } 48819baf839SRobert Olsson /* One child */ 48919baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 49019baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 49191b9a277SOlof Johansson struct node *n; 49219baf839SRobert Olsson 49391b9a277SOlof Johansson n = tn->child[i]; 4942373ce1cSRobert Olsson if (!n) 49591b9a277SOlof Johansson continue; 49619baf839SRobert Olsson 49719baf839SRobert Olsson /* compress one level */ 49806801916SStephen Hemminger node_set_parent(n, NULL); 49919baf839SRobert Olsson tnode_free(tn); 50019baf839SRobert Olsson return n; 50119baf839SRobert Olsson } 50219baf839SRobert Olsson /* 50319baf839SRobert Olsson * Double as long as the resulting node has a number of 50419baf839SRobert Olsson * nonempty nodes that are above the threshold. 50519baf839SRobert Olsson */ 50619baf839SRobert Olsson 50719baf839SRobert Olsson /* 50819baf839SRobert Olsson * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 50919baf839SRobert Olsson * the Helsinki University of Technology and Matti Tikkanen of Nokia 51019baf839SRobert Olsson * Telecommunications, page 6: 51119baf839SRobert Olsson * "A node is doubled if the ratio of non-empty children to all 51219baf839SRobert Olsson * children in the *doubled* node is at least 'high'." 51319baf839SRobert Olsson * 51419baf839SRobert Olsson * 'high' in this instance is the variable 'inflate_threshold'. It 51519baf839SRobert Olsson * is expressed as a percentage, so we multiply it with 51619baf839SRobert Olsson * tnode_child_length() and instead of multiplying by 2 (since the 51719baf839SRobert Olsson * child array will be doubled by inflate()) and multiplying 51819baf839SRobert Olsson * the left-hand side by 100 (to handle the percentage thing) we 51919baf839SRobert Olsson * multiply the left-hand side by 50. 52019baf839SRobert Olsson * 52119baf839SRobert Olsson * The left-hand side may look a bit weird: tnode_child_length(tn) 52219baf839SRobert Olsson * - tn->empty_children is of course the number of non-null children 52319baf839SRobert Olsson * in the current node. tn->full_children is the number of "full" 52419baf839SRobert Olsson * children, that is non-null tnodes with a skip value of 0. 52519baf839SRobert Olsson * All of those will be doubled in the resulting inflated tnode, so 52619baf839SRobert Olsson * we just count them one extra time here. 52719baf839SRobert Olsson * 52819baf839SRobert Olsson * A clearer way to write this would be: 52919baf839SRobert Olsson * 53019baf839SRobert Olsson * to_be_doubled = tn->full_children; 53119baf839SRobert Olsson * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 53219baf839SRobert Olsson * tn->full_children; 53319baf839SRobert Olsson * 53419baf839SRobert Olsson * new_child_length = tnode_child_length(tn) * 2; 53519baf839SRobert Olsson * 53619baf839SRobert Olsson * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 53719baf839SRobert Olsson * new_child_length; 53819baf839SRobert Olsson * if (new_fill_factor >= inflate_threshold) 53919baf839SRobert Olsson * 54019baf839SRobert Olsson * ...and so on, tho it would mess up the while () loop. 54119baf839SRobert Olsson * 54219baf839SRobert Olsson * anyway, 54319baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 54419baf839SRobert Olsson * inflate_threshold 54519baf839SRobert Olsson * 54619baf839SRobert Olsson * avoid a division: 54719baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 54819baf839SRobert Olsson * inflate_threshold * new_child_length 54919baf839SRobert Olsson * 55019baf839SRobert Olsson * expand not_to_be_doubled and to_be_doubled, and shorten: 55119baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 55219baf839SRobert Olsson * tn->full_children) >= inflate_threshold * new_child_length 55319baf839SRobert Olsson * 55419baf839SRobert Olsson * expand new_child_length: 55519baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 55619baf839SRobert Olsson * tn->full_children) >= 55719baf839SRobert Olsson * inflate_threshold * tnode_child_length(tn) * 2 55819baf839SRobert Olsson * 55919baf839SRobert Olsson * shorten again: 56019baf839SRobert Olsson * 50 * (tn->full_children + tnode_child_length(tn) - 56119baf839SRobert Olsson * tn->empty_children) >= inflate_threshold * 56219baf839SRobert Olsson * tnode_child_length(tn) 56319baf839SRobert Olsson * 56419baf839SRobert Olsson */ 56519baf839SRobert Olsson 56619baf839SRobert Olsson check_tnode(tn); 56719baf839SRobert Olsson 568e6308be8SRobert Olsson /* Keep root node larger */ 569e6308be8SRobert Olsson 570e6308be8SRobert Olsson if (!tn->parent) 571e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold_root; 572e6308be8SRobert Olsson else 573e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold; 574e6308be8SRobert Olsson 5752f36895aSRobert Olsson err = 0; 57605eee48cSRobert Olsson max_resize = 10; 57705eee48cSRobert Olsson while ((tn->full_children > 0 && max_resize-- && 57819baf839SRobert Olsson 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 579e6308be8SRobert Olsson inflate_threshold_use * tnode_child_length(tn))) { 58019baf839SRobert Olsson 5812f80b3c8SRobert Olsson old_tn = tn; 5822f80b3c8SRobert Olsson tn = inflate(t, tn); 5832f80b3c8SRobert Olsson if (IS_ERR(tn)) { 5842f80b3c8SRobert Olsson tn = old_tn; 5852f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 5862f36895aSRobert Olsson t->stats.resize_node_skipped++; 5872f36895aSRobert Olsson #endif 5882f36895aSRobert Olsson break; 5892f36895aSRobert Olsson } 59019baf839SRobert Olsson } 59119baf839SRobert Olsson 59205eee48cSRobert Olsson if (max_resize < 0) { 59305eee48cSRobert Olsson if (!tn->parent) 59405eee48cSRobert Olsson printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n", 59505eee48cSRobert Olsson inflate_threshold_root, tn->bits); 59605eee48cSRobert Olsson else 59705eee48cSRobert Olsson printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n", 59805eee48cSRobert Olsson inflate_threshold, tn->bits); 59905eee48cSRobert Olsson } 60005eee48cSRobert Olsson 60119baf839SRobert Olsson check_tnode(tn); 60219baf839SRobert Olsson 60319baf839SRobert Olsson /* 60419baf839SRobert Olsson * Halve as long as the number of empty children in this 60519baf839SRobert Olsson * node is above threshold. 60619baf839SRobert Olsson */ 6072f36895aSRobert Olsson 608e6308be8SRobert Olsson 609e6308be8SRobert Olsson /* Keep root node larger */ 610e6308be8SRobert Olsson 611e6308be8SRobert Olsson if (!tn->parent) 612e6308be8SRobert Olsson halve_threshold_use = halve_threshold_root; 613e6308be8SRobert Olsson else 614e6308be8SRobert Olsson halve_threshold_use = halve_threshold; 615e6308be8SRobert Olsson 6162f36895aSRobert Olsson err = 0; 61705eee48cSRobert Olsson max_resize = 10; 61805eee48cSRobert Olsson while (tn->bits > 1 && max_resize-- && 61919baf839SRobert Olsson 100 * (tnode_child_length(tn) - tn->empty_children) < 620e6308be8SRobert Olsson halve_threshold_use * tnode_child_length(tn)) { 62119baf839SRobert Olsson 6222f80b3c8SRobert Olsson old_tn = tn; 6232f80b3c8SRobert Olsson tn = halve(t, tn); 6242f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6252f80b3c8SRobert Olsson tn = old_tn; 6262f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6272f36895aSRobert Olsson t->stats.resize_node_skipped++; 6282f36895aSRobert Olsson #endif 6292f36895aSRobert Olsson break; 6302f36895aSRobert Olsson } 6312f36895aSRobert Olsson } 6322f36895aSRobert Olsson 63305eee48cSRobert Olsson if (max_resize < 0) { 63405eee48cSRobert Olsson if (!tn->parent) 63505eee48cSRobert Olsson printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n", 63605eee48cSRobert Olsson halve_threshold_root, tn->bits); 63705eee48cSRobert Olsson else 63805eee48cSRobert Olsson printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n", 63905eee48cSRobert Olsson halve_threshold, tn->bits); 64005eee48cSRobert Olsson } 64119baf839SRobert Olsson 64219baf839SRobert Olsson /* Only one child remains */ 64319baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 64419baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 64591b9a277SOlof Johansson struct node *n; 64619baf839SRobert Olsson 64791b9a277SOlof Johansson n = tn->child[i]; 6482373ce1cSRobert Olsson if (!n) 64991b9a277SOlof Johansson continue; 65091b9a277SOlof Johansson 65191b9a277SOlof Johansson /* compress one level */ 65291b9a277SOlof Johansson 65306801916SStephen Hemminger node_set_parent(n, NULL); 65419baf839SRobert Olsson tnode_free(tn); 65519baf839SRobert Olsson return n; 65619baf839SRobert Olsson } 65719baf839SRobert Olsson 65819baf839SRobert Olsson return (struct node *) tn; 65919baf839SRobert Olsson } 66019baf839SRobert Olsson 6612f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn) 66219baf839SRobert Olsson { 66319baf839SRobert Olsson struct tnode *oldtnode = tn; 66419baf839SRobert Olsson int olen = tnode_child_length(tn); 66519baf839SRobert Olsson int i; 66619baf839SRobert Olsson 6670c7770c7SStephen Hemminger pr_debug("In inflate\n"); 66819baf839SRobert Olsson 66919baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 67019baf839SRobert Olsson 6712f80b3c8SRobert Olsson if (!tn) 6722f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 6732f36895aSRobert Olsson 6742f36895aSRobert Olsson /* 6752f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 6762f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 6772f36895aSRobert Olsson * fails. In case of failure we return the oldnode and inflate 6782f36895aSRobert Olsson * of tnode is ignored. 6792f36895aSRobert Olsson */ 6802f36895aSRobert Olsson 6812f36895aSRobert Olsson for (i = 0; i < olen; i++) { 6822f36895aSRobert Olsson struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); 6832f36895aSRobert Olsson 6842f36895aSRobert Olsson if (inode && 6852f36895aSRobert Olsson IS_TNODE(inode) && 6862f36895aSRobert Olsson inode->pos == oldtnode->pos + oldtnode->bits && 6872f36895aSRobert Olsson inode->bits > 1) { 6882f36895aSRobert Olsson struct tnode *left, *right; 689ab66b4a7SStephen Hemminger t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; 6902f36895aSRobert Olsson 6912f36895aSRobert Olsson left = tnode_new(inode->key&(~m), inode->pos + 1, 6922f36895aSRobert Olsson inode->bits - 1); 6932f80b3c8SRobert Olsson if (!left) 6942f80b3c8SRobert Olsson goto nomem; 6952f36895aSRobert Olsson 6962f36895aSRobert Olsson right = tnode_new(inode->key|m, inode->pos + 1, 6972f36895aSRobert Olsson inode->bits - 1); 6982f36895aSRobert Olsson 6992f36895aSRobert Olsson if (!right) { 7002f80b3c8SRobert Olsson tnode_free(left); 7012f80b3c8SRobert Olsson goto nomem; 7022f36895aSRobert Olsson } 7032f36895aSRobert Olsson 7042f36895aSRobert Olsson put_child(t, tn, 2*i, (struct node *) left); 7052f36895aSRobert Olsson put_child(t, tn, 2*i+1, (struct node *) right); 7062f36895aSRobert Olsson } 7072f36895aSRobert Olsson } 7082f36895aSRobert Olsson 70919baf839SRobert Olsson for (i = 0; i < olen; i++) { 710c95aaf9aSStephen Hemminger struct tnode *inode; 71119baf839SRobert Olsson struct node *node = tnode_get_child(oldtnode, i); 71291b9a277SOlof Johansson struct tnode *left, *right; 71391b9a277SOlof Johansson int size, j; 71419baf839SRobert Olsson 71519baf839SRobert Olsson /* An empty child */ 71619baf839SRobert Olsson if (node == NULL) 71719baf839SRobert Olsson continue; 71819baf839SRobert Olsson 71919baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 72019baf839SRobert Olsson 72119baf839SRobert Olsson if (IS_LEAF(node) || ((struct tnode *) node)->pos > 72219baf839SRobert Olsson tn->pos + tn->bits - 1) { 7232f36895aSRobert Olsson if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 72419baf839SRobert Olsson 1) == 0) 72519baf839SRobert Olsson put_child(t, tn, 2*i, node); 72619baf839SRobert Olsson else 72719baf839SRobert Olsson put_child(t, tn, 2*i+1, node); 72819baf839SRobert Olsson continue; 72919baf839SRobert Olsson } 73019baf839SRobert Olsson 73119baf839SRobert Olsson /* An internal node with two children */ 73219baf839SRobert Olsson inode = (struct tnode *) node; 73319baf839SRobert Olsson 73419baf839SRobert Olsson if (inode->bits == 1) { 73519baf839SRobert Olsson put_child(t, tn, 2*i, inode->child[0]); 73619baf839SRobert Olsson put_child(t, tn, 2*i+1, inode->child[1]); 73719baf839SRobert Olsson 73819baf839SRobert Olsson tnode_free(inode); 73991b9a277SOlof Johansson continue; 74019baf839SRobert Olsson } 74119baf839SRobert Olsson 74219baf839SRobert Olsson /* An internal node with more than two children */ 74319baf839SRobert Olsson 74419baf839SRobert Olsson /* We will replace this node 'inode' with two new 74519baf839SRobert Olsson * ones, 'left' and 'right', each with half of the 74619baf839SRobert Olsson * original children. The two new nodes will have 74719baf839SRobert Olsson * a position one bit further down the key and this 74819baf839SRobert Olsson * means that the "significant" part of their keys 74919baf839SRobert Olsson * (see the discussion near the top of this file) 75019baf839SRobert Olsson * will differ by one bit, which will be "0" in 75119baf839SRobert Olsson * left's key and "1" in right's key. Since we are 75219baf839SRobert Olsson * moving the key position by one step, the bit that 75319baf839SRobert Olsson * we are moving away from - the bit at position 75419baf839SRobert Olsson * (inode->pos) - is the one that will differ between 75519baf839SRobert Olsson * left and right. So... we synthesize that bit in the 75619baf839SRobert Olsson * two new keys. 75719baf839SRobert Olsson * The mask 'm' below will be a single "one" bit at 75819baf839SRobert Olsson * the position (inode->pos) 75919baf839SRobert Olsson */ 76019baf839SRobert Olsson 76119baf839SRobert Olsson /* Use the old key, but set the new significant 76219baf839SRobert Olsson * bit to zero. 76319baf839SRobert Olsson */ 7642f36895aSRobert Olsson 7652f36895aSRobert Olsson left = (struct tnode *) tnode_get_child(tn, 2*i); 7662f36895aSRobert Olsson put_child(t, tn, 2*i, NULL); 76719baf839SRobert Olsson 76891b9a277SOlof Johansson BUG_ON(!left); 76919baf839SRobert Olsson 7702f36895aSRobert Olsson right = (struct tnode *) tnode_get_child(tn, 2*i+1); 7712f36895aSRobert Olsson put_child(t, tn, 2*i+1, NULL); 77219baf839SRobert Olsson 77391b9a277SOlof Johansson BUG_ON(!right); 77419baf839SRobert Olsson 77519baf839SRobert Olsson size = tnode_child_length(left); 77619baf839SRobert Olsson for (j = 0; j < size; j++) { 77719baf839SRobert Olsson put_child(t, left, j, inode->child[j]); 77819baf839SRobert Olsson put_child(t, right, j, inode->child[j + size]); 77919baf839SRobert Olsson } 78019baf839SRobert Olsson put_child(t, tn, 2*i, resize(t, left)); 78119baf839SRobert Olsson put_child(t, tn, 2*i+1, resize(t, right)); 78219baf839SRobert Olsson 78319baf839SRobert Olsson tnode_free(inode); 78419baf839SRobert Olsson } 78519baf839SRobert Olsson tnode_free(oldtnode); 78619baf839SRobert Olsson return tn; 7872f80b3c8SRobert Olsson nomem: 7882f80b3c8SRobert Olsson { 7892f80b3c8SRobert Olsson int size = tnode_child_length(tn); 7902f80b3c8SRobert Olsson int j; 7912f80b3c8SRobert Olsson 7922f80b3c8SRobert Olsson for (j = 0; j < size; j++) 7932f80b3c8SRobert Olsson if (tn->child[j]) 7942f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 7952f80b3c8SRobert Olsson 7962f80b3c8SRobert Olsson tnode_free(tn); 7972f80b3c8SRobert Olsson 7982f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 7992f80b3c8SRobert Olsson } 80019baf839SRobert Olsson } 80119baf839SRobert Olsson 8022f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn) 80319baf839SRobert Olsson { 80419baf839SRobert Olsson struct tnode *oldtnode = tn; 80519baf839SRobert Olsson struct node *left, *right; 80619baf839SRobert Olsson int i; 80719baf839SRobert Olsson int olen = tnode_child_length(tn); 80819baf839SRobert Olsson 8090c7770c7SStephen Hemminger pr_debug("In halve\n"); 81019baf839SRobert Olsson 81119baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 81219baf839SRobert Olsson 8132f80b3c8SRobert Olsson if (!tn) 8142f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8152f36895aSRobert Olsson 8162f36895aSRobert Olsson /* 8172f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 8182f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 8192f36895aSRobert Olsson * fails. In case of failure we return the oldnode and halve 8202f36895aSRobert Olsson * of tnode is ignored. 8212f36895aSRobert Olsson */ 8222f36895aSRobert Olsson 8232f36895aSRobert Olsson for (i = 0; i < olen; i += 2) { 8242f36895aSRobert Olsson left = tnode_get_child(oldtnode, i); 8252f36895aSRobert Olsson right = tnode_get_child(oldtnode, i+1); 8262f36895aSRobert Olsson 8272f36895aSRobert Olsson /* Two nonempty children */ 8282f36895aSRobert Olsson if (left && right) { 8292f80b3c8SRobert Olsson struct tnode *newn; 8302f36895aSRobert Olsson 8312f80b3c8SRobert Olsson newn = tnode_new(left->key, tn->pos + tn->bits, 1); 8322f80b3c8SRobert Olsson 8332f80b3c8SRobert Olsson if (!newn) 8342f80b3c8SRobert Olsson goto nomem; 8352f80b3c8SRobert Olsson 8362f80b3c8SRobert Olsson put_child(t, tn, i/2, (struct node *)newn); 8372f36895aSRobert Olsson } 8382f36895aSRobert Olsson 8392f36895aSRobert Olsson } 84019baf839SRobert Olsson 84119baf839SRobert Olsson for (i = 0; i < olen; i += 2) { 84291b9a277SOlof Johansson struct tnode *newBinNode; 84391b9a277SOlof Johansson 84419baf839SRobert Olsson left = tnode_get_child(oldtnode, i); 84519baf839SRobert Olsson right = tnode_get_child(oldtnode, i+1); 84619baf839SRobert Olsson 84719baf839SRobert Olsson /* At least one of the children is empty */ 84819baf839SRobert Olsson if (left == NULL) { 84919baf839SRobert Olsson if (right == NULL) /* Both are empty */ 85019baf839SRobert Olsson continue; 85119baf839SRobert Olsson put_child(t, tn, i/2, right); 85291b9a277SOlof Johansson continue; 85391b9a277SOlof Johansson } 85491b9a277SOlof Johansson 85591b9a277SOlof Johansson if (right == NULL) { 85619baf839SRobert Olsson put_child(t, tn, i/2, left); 85791b9a277SOlof Johansson continue; 85891b9a277SOlof Johansson } 85919baf839SRobert Olsson 86019baf839SRobert Olsson /* Two nonempty children */ 86191b9a277SOlof Johansson newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 8622f36895aSRobert Olsson put_child(t, tn, i/2, NULL); 86319baf839SRobert Olsson put_child(t, newBinNode, 0, left); 86419baf839SRobert Olsson put_child(t, newBinNode, 1, right); 86519baf839SRobert Olsson put_child(t, tn, i/2, resize(t, newBinNode)); 86619baf839SRobert Olsson } 86719baf839SRobert Olsson tnode_free(oldtnode); 86819baf839SRobert Olsson return tn; 8692f80b3c8SRobert Olsson nomem: 8702f80b3c8SRobert Olsson { 8712f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8722f80b3c8SRobert Olsson int j; 8732f80b3c8SRobert Olsson 8742f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8752f80b3c8SRobert Olsson if (tn->child[j]) 8762f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8772f80b3c8SRobert Olsson 8782f80b3c8SRobert Olsson tnode_free(tn); 8792f80b3c8SRobert Olsson 8802f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8812f80b3c8SRobert Olsson } 88219baf839SRobert Olsson } 88319baf839SRobert Olsson 884772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 8852373ce1cSRobert Olsson via get_fa_head and dump */ 8862373ce1cSRobert Olsson 887772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen) 88819baf839SRobert Olsson { 889772cb712SRobert Olsson struct hlist_head *head = &l->list; 89019baf839SRobert Olsson struct hlist_node *node; 89119baf839SRobert Olsson struct leaf_info *li; 89219baf839SRobert Olsson 8932373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, head, hlist) 89419baf839SRobert Olsson if (li->plen == plen) 89519baf839SRobert Olsson return li; 89691b9a277SOlof Johansson 89719baf839SRobert Olsson return NULL; 89819baf839SRobert Olsson } 89919baf839SRobert Olsson 90019baf839SRobert Olsson static inline struct list_head * get_fa_head(struct leaf *l, int plen) 90119baf839SRobert Olsson { 902772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 90319baf839SRobert Olsson 90491b9a277SOlof Johansson if (!li) 90591b9a277SOlof Johansson return NULL; 90619baf839SRobert Olsson 90791b9a277SOlof Johansson return &li->falh; 90819baf839SRobert Olsson } 90919baf839SRobert Olsson 91019baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 91119baf839SRobert Olsson { 91219baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 91391b9a277SOlof Johansson struct hlist_node *node; 91419baf839SRobert Olsson 91591b9a277SOlof Johansson if (hlist_empty(head)) { 9162373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 91791b9a277SOlof Johansson } else { 91891b9a277SOlof Johansson hlist_for_each_entry(li, node, head, hlist) { 91919baf839SRobert Olsson if (new->plen > li->plen) 92019baf839SRobert Olsson break; 92119baf839SRobert Olsson 92219baf839SRobert Olsson last = li; 92319baf839SRobert Olsson } 92419baf839SRobert Olsson if (last) 9252373ce1cSRobert Olsson hlist_add_after_rcu(&last->hlist, &new->hlist); 92619baf839SRobert Olsson else 9272373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 92819baf839SRobert Olsson } 92919baf839SRobert Olsson } 93019baf839SRobert Olsson 9312373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 9322373ce1cSRobert Olsson 93319baf839SRobert Olsson static struct leaf * 93419baf839SRobert Olsson fib_find_node(struct trie *t, u32 key) 93519baf839SRobert Olsson { 93619baf839SRobert Olsson int pos; 93719baf839SRobert Olsson struct tnode *tn; 93819baf839SRobert Olsson struct node *n; 93919baf839SRobert Olsson 94019baf839SRobert Olsson pos = 0; 9412373ce1cSRobert Olsson n = rcu_dereference(t->trie); 94219baf839SRobert Olsson 94319baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 94419baf839SRobert Olsson tn = (struct tnode *) n; 94519baf839SRobert Olsson 94619baf839SRobert Olsson check_tnode(tn); 94719baf839SRobert Olsson 94819baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 94919baf839SRobert Olsson pos = tn->pos + tn->bits; 950b59cfbf7SEric Dumazet n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 95191b9a277SOlof Johansson } else 95219baf839SRobert Olsson break; 95319baf839SRobert Olsson } 95419baf839SRobert Olsson /* Case we have found a leaf. Compare prefixes */ 95519baf839SRobert Olsson 95691b9a277SOlof Johansson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) 95791b9a277SOlof Johansson return (struct leaf *)n; 95891b9a277SOlof Johansson 95919baf839SRobert Olsson return NULL; 96019baf839SRobert Olsson } 96119baf839SRobert Olsson 96219baf839SRobert Olsson static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 96319baf839SRobert Olsson { 96419baf839SRobert Olsson int wasfull; 96506801916SStephen Hemminger t_key cindex, key = tn->key; 96606801916SStephen Hemminger struct tnode *tp; 96719baf839SRobert Olsson 96806801916SStephen Hemminger while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 96919baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 97019baf839SRobert Olsson wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 97119baf839SRobert Olsson tn = (struct tnode *) resize (t, (struct tnode *)tn); 97219baf839SRobert Olsson tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 97319baf839SRobert Olsson 97406801916SStephen Hemminger tp = node_parent((struct node *) tn); 97506801916SStephen Hemminger if (!tp) 97619baf839SRobert Olsson break; 97706801916SStephen Hemminger tn = tp; 97819baf839SRobert Olsson } 97906801916SStephen Hemminger 98019baf839SRobert Olsson /* Handle last (top) tnode */ 98119baf839SRobert Olsson if (IS_TNODE(tn)) 98219baf839SRobert Olsson tn = (struct tnode*) resize(t, (struct tnode *)tn); 98319baf839SRobert Olsson 98419baf839SRobert Olsson return (struct node*) tn; 98519baf839SRobert Olsson } 98619baf839SRobert Olsson 9872373ce1cSRobert Olsson /* only used from updater-side */ 9882373ce1cSRobert Olsson 989fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 99019baf839SRobert Olsson { 99119baf839SRobert Olsson int pos, newpos; 99219baf839SRobert Olsson struct tnode *tp = NULL, *tn = NULL; 99319baf839SRobert Olsson struct node *n; 99419baf839SRobert Olsson struct leaf *l; 99519baf839SRobert Olsson int missbit; 99619baf839SRobert Olsson struct list_head *fa_head = NULL; 99719baf839SRobert Olsson struct leaf_info *li; 99819baf839SRobert Olsson t_key cindex; 99919baf839SRobert Olsson 100019baf839SRobert Olsson pos = 0; 100119baf839SRobert Olsson n = t->trie; 100219baf839SRobert Olsson 100319baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 100419baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 100519baf839SRobert Olsson * and we should just put our new leaf in that. 100619baf839SRobert Olsson * If we point to a T_TNODE, check if it matches our key. Note that 100719baf839SRobert Olsson * a T_TNODE might be skipping any number of bits - its 'pos' need 100819baf839SRobert Olsson * not be the parent's 'pos'+'bits'! 100919baf839SRobert Olsson * 101019baf839SRobert Olsson * If it does match the current key, get pos/bits from it, extract 101119baf839SRobert Olsson * the index from our key, push the T_TNODE and walk the tree. 101219baf839SRobert Olsson * 101319baf839SRobert Olsson * If it doesn't, we have to replace it with a new T_TNODE. 101419baf839SRobert Olsson * 101519baf839SRobert Olsson * If we point to a T_LEAF, it might or might not have the same key 101619baf839SRobert Olsson * as we do. If it does, just change the value, update the T_LEAF's 101719baf839SRobert Olsson * value, and return it. 101819baf839SRobert Olsson * If it doesn't, we need to replace it with a T_TNODE. 101919baf839SRobert Olsson */ 102019baf839SRobert Olsson 102119baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 102219baf839SRobert Olsson tn = (struct tnode *) n; 102319baf839SRobert Olsson 102419baf839SRobert Olsson check_tnode(tn); 102519baf839SRobert Olsson 102619baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 102719baf839SRobert Olsson tp = tn; 102819baf839SRobert Olsson pos = tn->pos + tn->bits; 102919baf839SRobert Olsson n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 103019baf839SRobert Olsson 103106801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 103291b9a277SOlof Johansson } else 103319baf839SRobert Olsson break; 103419baf839SRobert Olsson } 103519baf839SRobert Olsson 103619baf839SRobert Olsson /* 103719baf839SRobert Olsson * n ----> NULL, LEAF or TNODE 103819baf839SRobert Olsson * 103919baf839SRobert Olsson * tp is n's (parent) ----> NULL or TNODE 104019baf839SRobert Olsson */ 104119baf839SRobert Olsson 104291b9a277SOlof Johansson BUG_ON(tp && IS_LEAF(tp)); 104319baf839SRobert Olsson 104419baf839SRobert Olsson /* Case 1: n is a leaf. Compare prefixes */ 104519baf839SRobert Olsson 104619baf839SRobert Olsson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1047c95aaf9aSStephen Hemminger l = (struct leaf *) n; 104819baf839SRobert Olsson li = leaf_info_new(plen); 104919baf839SRobert Olsson 1050fea86ad8SStephen Hemminger if (!li) 1051fea86ad8SStephen Hemminger return NULL; 105219baf839SRobert Olsson 105319baf839SRobert Olsson fa_head = &li->falh; 105419baf839SRobert Olsson insert_leaf_info(&l->list, li); 105519baf839SRobert Olsson goto done; 105619baf839SRobert Olsson } 105719baf839SRobert Olsson l = leaf_new(); 105819baf839SRobert Olsson 1059fea86ad8SStephen Hemminger if (!l) 1060fea86ad8SStephen Hemminger return NULL; 106119baf839SRobert Olsson 106219baf839SRobert Olsson l->key = key; 106319baf839SRobert Olsson li = leaf_info_new(plen); 106419baf839SRobert Olsson 1065f835e471SRobert Olsson if (!li) { 1066f835e471SRobert Olsson tnode_free((struct tnode *) l); 1067fea86ad8SStephen Hemminger return NULL; 1068f835e471SRobert Olsson } 106919baf839SRobert Olsson 107019baf839SRobert Olsson fa_head = &li->falh; 107119baf839SRobert Olsson insert_leaf_info(&l->list, li); 107219baf839SRobert Olsson 107319baf839SRobert Olsson if (t->trie && n == NULL) { 107491b9a277SOlof Johansson /* Case 2: n is NULL, and will just insert a new leaf */ 107519baf839SRobert Olsson 107606801916SStephen Hemminger node_set_parent((struct node *)l, tp); 107719baf839SRobert Olsson 107819baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 107919baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 108091b9a277SOlof Johansson } else { 108119baf839SRobert Olsson /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 108219baf839SRobert Olsson /* 108319baf839SRobert Olsson * Add a new tnode here 108419baf839SRobert Olsson * first tnode need some special handling 108519baf839SRobert Olsson */ 108619baf839SRobert Olsson 108719baf839SRobert Olsson if (tp) 108819baf839SRobert Olsson pos = tp->pos+tp->bits; 108919baf839SRobert Olsson else 109019baf839SRobert Olsson pos = 0; 109191b9a277SOlof Johansson 109219baf839SRobert Olsson if (n) { 109319baf839SRobert Olsson newpos = tkey_mismatch(key, pos, n->key); 109419baf839SRobert Olsson tn = tnode_new(n->key, newpos, 1); 109591b9a277SOlof Johansson } else { 109619baf839SRobert Olsson newpos = 0; 109719baf839SRobert Olsson tn = tnode_new(key, newpos, 1); /* First tnode */ 109819baf839SRobert Olsson } 1099f835e471SRobert Olsson 1100f835e471SRobert Olsson if (!tn) { 1101f835e471SRobert Olsson free_leaf_info(li); 1102f835e471SRobert Olsson tnode_free((struct tnode *) l); 1103fea86ad8SStephen Hemminger return NULL; 1104f835e471SRobert Olsson } 110519baf839SRobert Olsson 110606801916SStephen Hemminger node_set_parent((struct node *)tn, tp); 110719baf839SRobert Olsson 110819baf839SRobert Olsson missbit = tkey_extract_bits(key, newpos, 1); 110919baf839SRobert Olsson put_child(t, tn, missbit, (struct node *)l); 111019baf839SRobert Olsson put_child(t, tn, 1-missbit, n); 111119baf839SRobert Olsson 111219baf839SRobert Olsson if (tp) { 111319baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 111419baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 111591b9a277SOlof Johansson } else { 11162373ce1cSRobert Olsson rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ 111719baf839SRobert Olsson tp = tn; 111819baf839SRobert Olsson } 111919baf839SRobert Olsson } 112091b9a277SOlof Johansson 112191b9a277SOlof Johansson if (tp && tp->pos + tp->bits > 32) 112278c6671aSStephen Hemminger printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 112319baf839SRobert Olsson tp, tp->pos, tp->bits, key, plen); 112491b9a277SOlof Johansson 112519baf839SRobert Olsson /* Rebalance the trie */ 11262373ce1cSRobert Olsson 11272373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1128f835e471SRobert Olsson done: 112919baf839SRobert Olsson return fa_head; 113019baf839SRobert Olsson } 113119baf839SRobert Olsson 1132d562f1f8SRobert Olsson /* 1133d562f1f8SRobert Olsson * Caller must hold RTNL. 1134d562f1f8SRobert Olsson */ 11354e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 113619baf839SRobert Olsson { 113719baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 113819baf839SRobert Olsson struct fib_alias *fa, *new_fa; 113919baf839SRobert Olsson struct list_head *fa_head = NULL; 114019baf839SRobert Olsson struct fib_info *fi; 11414e902c57SThomas Graf int plen = cfg->fc_dst_len; 11424e902c57SThomas Graf u8 tos = cfg->fc_tos; 114319baf839SRobert Olsson u32 key, mask; 114419baf839SRobert Olsson int err; 114519baf839SRobert Olsson struct leaf *l; 114619baf839SRobert Olsson 114719baf839SRobert Olsson if (plen > 32) 114819baf839SRobert Olsson return -EINVAL; 114919baf839SRobert Olsson 11504e902c57SThomas Graf key = ntohl(cfg->fc_dst); 115119baf839SRobert Olsson 11522dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 115319baf839SRobert Olsson 115419baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 115519baf839SRobert Olsson 115619baf839SRobert Olsson if (key & ~mask) 115719baf839SRobert Olsson return -EINVAL; 115819baf839SRobert Olsson 115919baf839SRobert Olsson key = key & mask; 116019baf839SRobert Olsson 11614e902c57SThomas Graf fi = fib_create_info(cfg); 11624e902c57SThomas Graf if (IS_ERR(fi)) { 11634e902c57SThomas Graf err = PTR_ERR(fi); 116419baf839SRobert Olsson goto err; 11654e902c57SThomas Graf } 116619baf839SRobert Olsson 116719baf839SRobert Olsson l = fib_find_node(t, key); 116819baf839SRobert Olsson fa = NULL; 116919baf839SRobert Olsson 117019baf839SRobert Olsson if (l) { 117119baf839SRobert Olsson fa_head = get_fa_head(l, plen); 117219baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 117319baf839SRobert Olsson } 117419baf839SRobert Olsson 117519baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 117619baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 117719baf839SRobert Olsson * exists or to the node before which we will insert new one. 117819baf839SRobert Olsson * 117919baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 118019baf839SRobert Olsson * insert to the head of f. 118119baf839SRobert Olsson * 118219baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 118319baf839SRobert Olsson * and we need to allocate a new one of those as well. 118419baf839SRobert Olsson */ 118519baf839SRobert Olsson 118691b9a277SOlof Johansson if (fa && fa->fa_info->fib_priority == fi->fib_priority) { 118719baf839SRobert Olsson struct fib_alias *fa_orig; 118819baf839SRobert Olsson 118919baf839SRobert Olsson err = -EEXIST; 11904e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 119119baf839SRobert Olsson goto out; 119219baf839SRobert Olsson 11934e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 119419baf839SRobert Olsson struct fib_info *fi_drop; 119519baf839SRobert Olsson u8 state; 119619baf839SRobert Olsson 11976725033fSJoonwoo Park if (fi->fib_treeref > 1) 11986725033fSJoonwoo Park goto out; 11996725033fSJoonwoo Park 12002373ce1cSRobert Olsson err = -ENOBUFS; 1201e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 12022373ce1cSRobert Olsson if (new_fa == NULL) 12032373ce1cSRobert Olsson goto out; 120419baf839SRobert Olsson 120519baf839SRobert Olsson fi_drop = fa->fa_info; 12062373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12072373ce1cSRobert Olsson new_fa->fa_info = fi; 12084e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12094e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 121019baf839SRobert Olsson state = fa->fa_state; 12112373ce1cSRobert Olsson new_fa->fa_state &= ~FA_S_ACCESSED; 121219baf839SRobert Olsson 12132373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12142373ce1cSRobert Olsson alias_free_mem_rcu(fa); 121519baf839SRobert Olsson 121619baf839SRobert Olsson fib_release_info(fi_drop); 121719baf839SRobert Olsson if (state & FA_S_ACCESSED) 121819baf839SRobert Olsson rt_cache_flush(-1); 1219b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1220b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 122119baf839SRobert Olsson 122219baf839SRobert Olsson goto succeeded; 122319baf839SRobert Olsson } 122419baf839SRobert Olsson /* Error if we find a perfect match which 122519baf839SRobert Olsson * uses the same scope, type, and nexthop 122619baf839SRobert Olsson * information. 122719baf839SRobert Olsson */ 122819baf839SRobert Olsson fa_orig = fa; 122919baf839SRobert Olsson list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 123019baf839SRobert Olsson if (fa->fa_tos != tos) 123119baf839SRobert Olsson break; 123219baf839SRobert Olsson if (fa->fa_info->fib_priority != fi->fib_priority) 123319baf839SRobert Olsson break; 12344e902c57SThomas Graf if (fa->fa_type == cfg->fc_type && 12354e902c57SThomas Graf fa->fa_scope == cfg->fc_scope && 123619baf839SRobert Olsson fa->fa_info == fi) { 123719baf839SRobert Olsson goto out; 123819baf839SRobert Olsson } 123919baf839SRobert Olsson } 12404e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 124119baf839SRobert Olsson fa = fa_orig; 124219baf839SRobert Olsson } 124319baf839SRobert Olsson err = -ENOENT; 12444e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 124519baf839SRobert Olsson goto out; 124619baf839SRobert Olsson 124719baf839SRobert Olsson err = -ENOBUFS; 1248e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 124919baf839SRobert Olsson if (new_fa == NULL) 125019baf839SRobert Olsson goto out; 125119baf839SRobert Olsson 125219baf839SRobert Olsson new_fa->fa_info = fi; 125319baf839SRobert Olsson new_fa->fa_tos = tos; 12544e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12554e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 125619baf839SRobert Olsson new_fa->fa_state = 0; 125719baf839SRobert Olsson /* 125819baf839SRobert Olsson * Insert new entry to the list. 125919baf839SRobert Olsson */ 126019baf839SRobert Olsson 1261f835e471SRobert Olsson if (!fa_head) { 1262fea86ad8SStephen Hemminger fa_head = fib_insert_node(t, key, plen); 1263fea86ad8SStephen Hemminger if (unlikely(!fa_head)) { 1264fea86ad8SStephen Hemminger err = -ENOMEM; 1265f835e471SRobert Olsson goto out_free_new_fa; 1266f835e471SRobert Olsson } 1267fea86ad8SStephen Hemminger } 126819baf839SRobert Olsson 12692373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 12702373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 127119baf839SRobert Olsson 1272d717a9a6SStephen Hemminger t->size++; 1273d717a9a6SStephen Hemminger 127419baf839SRobert Olsson rt_cache_flush(-1); 12754e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1276b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 127719baf839SRobert Olsson succeeded: 127819baf839SRobert Olsson return 0; 1279f835e471SRobert Olsson 1280f835e471SRobert Olsson out_free_new_fa: 1281f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 128219baf839SRobert Olsson out: 128319baf839SRobert Olsson fib_release_info(fi); 128491b9a277SOlof Johansson err: 128519baf839SRobert Olsson return err; 128619baf839SRobert Olsson } 128719baf839SRobert Olsson 12882373ce1cSRobert Olsson 1289772cb712SRobert Olsson /* should be called with rcu_read_lock */ 12900c7770c7SStephen Hemminger static inline int check_leaf(struct trie *t, struct leaf *l, 12910c7770c7SStephen Hemminger t_key key, int *plen, const struct flowi *flp, 129206c74270SPatrick McHardy struct fib_result *res) 129319baf839SRobert Olsson { 129406c74270SPatrick McHardy int err, i; 1295888454c5SAl Viro __be32 mask; 129619baf839SRobert Olsson struct leaf_info *li; 129719baf839SRobert Olsson struct hlist_head *hhead = &l->list; 129819baf839SRobert Olsson struct hlist_node *node; 129919baf839SRobert Olsson 13002373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, hhead, hlist) { 130119baf839SRobert Olsson i = li->plen; 1302888454c5SAl Viro mask = inet_make_mask(i); 1303888454c5SAl Viro if (l->key != (key & ntohl(mask))) 130419baf839SRobert Olsson continue; 130519baf839SRobert Olsson 1306888454c5SAl Viro if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { 130719baf839SRobert Olsson *plen = i; 130819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 130919baf839SRobert Olsson t->stats.semantic_match_passed++; 131019baf839SRobert Olsson #endif 131106c74270SPatrick McHardy return err; 131219baf839SRobert Olsson } 131319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 131419baf839SRobert Olsson t->stats.semantic_match_miss++; 131519baf839SRobert Olsson #endif 131619baf839SRobert Olsson } 131706c74270SPatrick McHardy return 1; 131819baf839SRobert Olsson } 131919baf839SRobert Olsson 132019baf839SRobert Olsson static int 132119baf839SRobert Olsson fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 132219baf839SRobert Olsson { 132319baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 132419baf839SRobert Olsson int plen, ret = 0; 132519baf839SRobert Olsson struct node *n; 132619baf839SRobert Olsson struct tnode *pn; 132719baf839SRobert Olsson int pos, bits; 132819baf839SRobert Olsson t_key key = ntohl(flp->fl4_dst); 132919baf839SRobert Olsson int chopped_off; 133019baf839SRobert Olsson t_key cindex = 0; 133119baf839SRobert Olsson int current_prefix_length = KEYLENGTH; 133291b9a277SOlof Johansson struct tnode *cn; 133391b9a277SOlof Johansson t_key node_prefix, key_prefix, pref_mismatch; 133491b9a277SOlof Johansson int mp; 133591b9a277SOlof Johansson 13362373ce1cSRobert Olsson rcu_read_lock(); 133719baf839SRobert Olsson 13382373ce1cSRobert Olsson n = rcu_dereference(t->trie); 133919baf839SRobert Olsson if (!n) 134019baf839SRobert Olsson goto failed; 134119baf839SRobert Olsson 134219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 134319baf839SRobert Olsson t->stats.gets++; 134419baf839SRobert Olsson #endif 134519baf839SRobert Olsson 134619baf839SRobert Olsson /* Just a leaf? */ 134719baf839SRobert Olsson if (IS_LEAF(n)) { 134806c74270SPatrick McHardy if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 134919baf839SRobert Olsson goto found; 135019baf839SRobert Olsson goto failed; 135119baf839SRobert Olsson } 135219baf839SRobert Olsson pn = (struct tnode *) n; 135319baf839SRobert Olsson chopped_off = 0; 135419baf839SRobert Olsson 135519baf839SRobert Olsson while (pn) { 135619baf839SRobert Olsson pos = pn->pos; 135719baf839SRobert Olsson bits = pn->bits; 135819baf839SRobert Olsson 135919baf839SRobert Olsson if (!chopped_off) 1360ab66b4a7SStephen Hemminger cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1361ab66b4a7SStephen Hemminger pos, bits); 136219baf839SRobert Olsson 136319baf839SRobert Olsson n = tnode_get_child(pn, cindex); 136419baf839SRobert Olsson 136519baf839SRobert Olsson if (n == NULL) { 136619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 136719baf839SRobert Olsson t->stats.null_node_hit++; 136819baf839SRobert Olsson #endif 136919baf839SRobert Olsson goto backtrace; 137019baf839SRobert Olsson } 137119baf839SRobert Olsson 137291b9a277SOlof Johansson if (IS_LEAF(n)) { 137391b9a277SOlof Johansson if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 137491b9a277SOlof Johansson goto found; 137591b9a277SOlof Johansson else 137691b9a277SOlof Johansson goto backtrace; 137791b9a277SOlof Johansson } 137891b9a277SOlof Johansson 137919baf839SRobert Olsson #define HL_OPTIMIZE 138019baf839SRobert Olsson #ifdef HL_OPTIMIZE 138191b9a277SOlof Johansson cn = (struct tnode *)n; 138219baf839SRobert Olsson 138319baf839SRobert Olsson /* 138419baf839SRobert Olsson * It's a tnode, and we can do some extra checks here if we 138519baf839SRobert Olsson * like, to avoid descending into a dead-end branch. 138619baf839SRobert Olsson * This tnode is in the parent's child array at index 138719baf839SRobert Olsson * key[p_pos..p_pos+p_bits] but potentially with some bits 138819baf839SRobert Olsson * chopped off, so in reality the index may be just a 138919baf839SRobert Olsson * subprefix, padded with zero at the end. 139019baf839SRobert Olsson * We can also take a look at any skipped bits in this 139119baf839SRobert Olsson * tnode - everything up to p_pos is supposed to be ok, 139219baf839SRobert Olsson * and the non-chopped bits of the index (se previous 139319baf839SRobert Olsson * paragraph) are also guaranteed ok, but the rest is 139419baf839SRobert Olsson * considered unknown. 139519baf839SRobert Olsson * 139619baf839SRobert Olsson * The skipped bits are key[pos+bits..cn->pos]. 139719baf839SRobert Olsson */ 139819baf839SRobert Olsson 139919baf839SRobert Olsson /* If current_prefix_length < pos+bits, we are already doing 140019baf839SRobert Olsson * actual prefix matching, which means everything from 140119baf839SRobert Olsson * pos+(bits-chopped_off) onward must be zero along some 140219baf839SRobert Olsson * branch of this subtree - otherwise there is *no* valid 140319baf839SRobert Olsson * prefix present. Here we can only check the skipped 140419baf839SRobert Olsson * bits. Remember, since we have already indexed into the 140519baf839SRobert Olsson * parent's child array, we know that the bits we chopped of 140619baf839SRobert Olsson * *are* zero. 140719baf839SRobert Olsson */ 140819baf839SRobert Olsson 140919baf839SRobert Olsson /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 141019baf839SRobert Olsson 141119baf839SRobert Olsson if (current_prefix_length < pos+bits) { 141219baf839SRobert Olsson if (tkey_extract_bits(cn->key, current_prefix_length, 141319baf839SRobert Olsson cn->pos - current_prefix_length) != 0 || 141419baf839SRobert Olsson !(cn->child[0])) 141519baf839SRobert Olsson goto backtrace; 141619baf839SRobert Olsson } 141719baf839SRobert Olsson 141819baf839SRobert Olsson /* 141919baf839SRobert Olsson * If chopped_off=0, the index is fully validated and we 142019baf839SRobert Olsson * only need to look at the skipped bits for this, the new, 142119baf839SRobert Olsson * tnode. What we actually want to do is to find out if 142219baf839SRobert Olsson * these skipped bits match our key perfectly, or if we will 142319baf839SRobert Olsson * have to count on finding a matching prefix further down, 142419baf839SRobert Olsson * because if we do, we would like to have some way of 142519baf839SRobert Olsson * verifying the existence of such a prefix at this point. 142619baf839SRobert Olsson */ 142719baf839SRobert Olsson 142819baf839SRobert Olsson /* The only thing we can do at this point is to verify that 142919baf839SRobert Olsson * any such matching prefix can indeed be a prefix to our 143019baf839SRobert Olsson * key, and if the bits in the node we are inspecting that 143119baf839SRobert Olsson * do not match our key are not ZERO, this cannot be true. 143219baf839SRobert Olsson * Thus, find out where there is a mismatch (before cn->pos) 143319baf839SRobert Olsson * and verify that all the mismatching bits are zero in the 143419baf839SRobert Olsson * new tnode's key. 143519baf839SRobert Olsson */ 143619baf839SRobert Olsson 143719baf839SRobert Olsson /* Note: We aren't very concerned about the piece of the key 143819baf839SRobert Olsson * that precede pn->pos+pn->bits, since these have already been 143919baf839SRobert Olsson * checked. The bits after cn->pos aren't checked since these are 144019baf839SRobert Olsson * by definition "unknown" at this point. Thus, what we want to 144119baf839SRobert Olsson * see is if we are about to enter the "prefix matching" state, 144219baf839SRobert Olsson * and in that case verify that the skipped bits that will prevail 144319baf839SRobert Olsson * throughout this subtree are zero, as they have to be if we are 144419baf839SRobert Olsson * to find a matching prefix. 144519baf839SRobert Olsson */ 144619baf839SRobert Olsson 1447ab66b4a7SStephen Hemminger node_prefix = mask_pfx(cn->key, cn->pos); 1448ab66b4a7SStephen Hemminger key_prefix = mask_pfx(key, cn->pos); 144919baf839SRobert Olsson pref_mismatch = key_prefix^node_prefix; 145019baf839SRobert Olsson mp = 0; 145119baf839SRobert Olsson 145219baf839SRobert Olsson /* In short: If skipped bits in this node do not match the search 145319baf839SRobert Olsson * key, enter the "prefix matching" state.directly. 145419baf839SRobert Olsson */ 145519baf839SRobert Olsson if (pref_mismatch) { 145619baf839SRobert Olsson while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 145719baf839SRobert Olsson mp++; 145819baf839SRobert Olsson pref_mismatch = pref_mismatch <<1; 145919baf839SRobert Olsson } 146019baf839SRobert Olsson key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 146119baf839SRobert Olsson 146219baf839SRobert Olsson if (key_prefix != 0) 146319baf839SRobert Olsson goto backtrace; 146419baf839SRobert Olsson 146519baf839SRobert Olsson if (current_prefix_length >= cn->pos) 146619baf839SRobert Olsson current_prefix_length = mp; 146719baf839SRobert Olsson } 146819baf839SRobert Olsson #endif 146919baf839SRobert Olsson pn = (struct tnode *)n; /* Descend */ 147019baf839SRobert Olsson chopped_off = 0; 147119baf839SRobert Olsson continue; 147291b9a277SOlof Johansson 147319baf839SRobert Olsson backtrace: 147419baf839SRobert Olsson chopped_off++; 147519baf839SRobert Olsson 147619baf839SRobert Olsson /* As zero don't change the child key (cindex) */ 147791b9a277SOlof Johansson while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) 147819baf839SRobert Olsson chopped_off++; 147919baf839SRobert Olsson 148019baf839SRobert Olsson /* Decrease current_... with bits chopped off */ 148119baf839SRobert Olsson if (current_prefix_length > pn->pos + pn->bits - chopped_off) 148219baf839SRobert Olsson current_prefix_length = pn->pos + pn->bits - chopped_off; 148319baf839SRobert Olsson 148419baf839SRobert Olsson /* 148519baf839SRobert Olsson * Either we do the actual chop off according or if we have 148619baf839SRobert Olsson * chopped off all bits in this tnode walk up to our parent. 148719baf839SRobert Olsson */ 148819baf839SRobert Olsson 148991b9a277SOlof Johansson if (chopped_off <= pn->bits) { 149019baf839SRobert Olsson cindex &= ~(1 << (chopped_off-1)); 149191b9a277SOlof Johansson } else { 149206801916SStephen Hemminger struct tnode *parent = node_parent((struct node *) pn); 149306801916SStephen Hemminger if (!parent) 149419baf839SRobert Olsson goto failed; 149519baf839SRobert Olsson 149619baf839SRobert Olsson /* Get Child's index */ 149706801916SStephen Hemminger cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); 149806801916SStephen Hemminger pn = parent; 149919baf839SRobert Olsson chopped_off = 0; 150019baf839SRobert Olsson 150119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 150219baf839SRobert Olsson t->stats.backtrack++; 150319baf839SRobert Olsson #endif 150419baf839SRobert Olsson goto backtrace; 150519baf839SRobert Olsson } 150619baf839SRobert Olsson } 150719baf839SRobert Olsson failed: 150819baf839SRobert Olsson ret = 1; 150919baf839SRobert Olsson found: 15102373ce1cSRobert Olsson rcu_read_unlock(); 151119baf839SRobert Olsson return ret; 151219baf839SRobert Olsson } 151319baf839SRobert Olsson 15142373ce1cSRobert Olsson /* only called from updater side */ 151519baf839SRobert Olsson static int trie_leaf_remove(struct trie *t, t_key key) 151619baf839SRobert Olsson { 151719baf839SRobert Olsson t_key cindex; 151819baf839SRobert Olsson struct tnode *tp = NULL; 151919baf839SRobert Olsson struct node *n = t->trie; 152019baf839SRobert Olsson struct leaf *l; 152119baf839SRobert Olsson 15220c7770c7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", n); 152319baf839SRobert Olsson 152419baf839SRobert Olsson /* Note that in the case skipped bits, those bits are *not* checked! 152519baf839SRobert Olsson * When we finish this, we will have NULL or a T_LEAF, and the 152619baf839SRobert Olsson * T_LEAF may or may not match our key. 152719baf839SRobert Olsson */ 152819baf839SRobert Olsson 152919baf839SRobert Olsson while (n != NULL && IS_TNODE(n)) { 153019baf839SRobert Olsson struct tnode *tn = (struct tnode *) n; 153119baf839SRobert Olsson check_tnode(tn); 153219baf839SRobert Olsson n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); 153319baf839SRobert Olsson 153406801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 153519baf839SRobert Olsson } 153619baf839SRobert Olsson l = (struct leaf *) n; 153719baf839SRobert Olsson 153819baf839SRobert Olsson if (!n || !tkey_equals(l->key, key)) 153919baf839SRobert Olsson return 0; 154019baf839SRobert Olsson 154119baf839SRobert Olsson /* 154219baf839SRobert Olsson * Key found. 154319baf839SRobert Olsson * Remove the leaf and rebalance the tree 154419baf839SRobert Olsson */ 154519baf839SRobert Olsson 154619baf839SRobert Olsson t->size--; 154719baf839SRobert Olsson 154806801916SStephen Hemminger tp = node_parent(n); 154919baf839SRobert Olsson tnode_free((struct tnode *) n); 155019baf839SRobert Olsson 155119baf839SRobert Olsson if (tp) { 155219baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 155319baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, NULL); 15542373ce1cSRobert Olsson rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 155591b9a277SOlof Johansson } else 15562373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 155719baf839SRobert Olsson 155819baf839SRobert Olsson return 1; 155919baf839SRobert Olsson } 156019baf839SRobert Olsson 1561d562f1f8SRobert Olsson /* 1562d562f1f8SRobert Olsson * Caller must hold RTNL. 1563d562f1f8SRobert Olsson */ 15644e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 156519baf839SRobert Olsson { 156619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 156719baf839SRobert Olsson u32 key, mask; 15684e902c57SThomas Graf int plen = cfg->fc_dst_len; 15694e902c57SThomas Graf u8 tos = cfg->fc_tos; 157019baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 157119baf839SRobert Olsson struct list_head *fa_head; 157219baf839SRobert Olsson struct leaf *l; 157391b9a277SOlof Johansson struct leaf_info *li; 157491b9a277SOlof Johansson 157519baf839SRobert Olsson if (plen > 32) 157619baf839SRobert Olsson return -EINVAL; 157719baf839SRobert Olsson 15784e902c57SThomas Graf key = ntohl(cfg->fc_dst); 157919baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 158019baf839SRobert Olsson 158119baf839SRobert Olsson if (key & ~mask) 158219baf839SRobert Olsson return -EINVAL; 158319baf839SRobert Olsson 158419baf839SRobert Olsson key = key & mask; 158519baf839SRobert Olsson l = fib_find_node(t, key); 158619baf839SRobert Olsson 158719baf839SRobert Olsson if (!l) 158819baf839SRobert Olsson return -ESRCH; 158919baf839SRobert Olsson 159019baf839SRobert Olsson fa_head = get_fa_head(l, plen); 159119baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 159219baf839SRobert Olsson 159319baf839SRobert Olsson if (!fa) 159419baf839SRobert Olsson return -ESRCH; 159519baf839SRobert Olsson 15960c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 159719baf839SRobert Olsson 159819baf839SRobert Olsson fa_to_delete = NULL; 159919baf839SRobert Olsson fa_head = fa->fa_list.prev; 16002373ce1cSRobert Olsson 160119baf839SRobert Olsson list_for_each_entry(fa, fa_head, fa_list) { 160219baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 160319baf839SRobert Olsson 160419baf839SRobert Olsson if (fa->fa_tos != tos) 160519baf839SRobert Olsson break; 160619baf839SRobert Olsson 16074e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 16084e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 16094e902c57SThomas Graf fa->fa_scope == cfg->fc_scope) && 16104e902c57SThomas Graf (!cfg->fc_protocol || 16114e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 16124e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 161319baf839SRobert Olsson fa_to_delete = fa; 161419baf839SRobert Olsson break; 161519baf839SRobert Olsson } 161619baf839SRobert Olsson } 161719baf839SRobert Olsson 161891b9a277SOlof Johansson if (!fa_to_delete) 161991b9a277SOlof Johansson return -ESRCH; 162019baf839SRobert Olsson 162119baf839SRobert Olsson fa = fa_to_delete; 16224e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1623b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 162419baf839SRobert Olsson 162519baf839SRobert Olsson l = fib_find_node(t, key); 1626772cb712SRobert Olsson li = find_leaf_info(l, plen); 162719baf839SRobert Olsson 16282373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 162919baf839SRobert Olsson 163019baf839SRobert Olsson if (list_empty(fa_head)) { 16312373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 163219baf839SRobert Olsson free_leaf_info(li); 16332373ce1cSRobert Olsson } 163419baf839SRobert Olsson 163519baf839SRobert Olsson if (hlist_empty(&l->list)) 163619baf839SRobert Olsson trie_leaf_remove(t, key); 163719baf839SRobert Olsson 163819baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 163919baf839SRobert Olsson rt_cache_flush(-1); 164019baf839SRobert Olsson 16412373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16422373ce1cSRobert Olsson alias_free_mem_rcu(fa); 164319baf839SRobert Olsson return 0; 164419baf839SRobert Olsson } 164519baf839SRobert Olsson 164619baf839SRobert Olsson static int trie_flush_list(struct trie *t, struct list_head *head) 164719baf839SRobert Olsson { 164819baf839SRobert Olsson struct fib_alias *fa, *fa_node; 164919baf839SRobert Olsson int found = 0; 165019baf839SRobert Olsson 165119baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 165219baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 165319baf839SRobert Olsson 165419baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 16552373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 16562373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16572373ce1cSRobert Olsson alias_free_mem_rcu(fa); 165819baf839SRobert Olsson found++; 165919baf839SRobert Olsson } 166019baf839SRobert Olsson } 166119baf839SRobert Olsson return found; 166219baf839SRobert Olsson } 166319baf839SRobert Olsson 166419baf839SRobert Olsson static int trie_flush_leaf(struct trie *t, struct leaf *l) 166519baf839SRobert Olsson { 166619baf839SRobert Olsson int found = 0; 166719baf839SRobert Olsson struct hlist_head *lih = &l->list; 166819baf839SRobert Olsson struct hlist_node *node, *tmp; 166919baf839SRobert Olsson struct leaf_info *li = NULL; 167019baf839SRobert Olsson 167119baf839SRobert Olsson hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 167219baf839SRobert Olsson found += trie_flush_list(t, &li->falh); 167319baf839SRobert Olsson 167419baf839SRobert Olsson if (list_empty(&li->falh)) { 16752373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 167619baf839SRobert Olsson free_leaf_info(li); 167719baf839SRobert Olsson } 167819baf839SRobert Olsson } 167919baf839SRobert Olsson return found; 168019baf839SRobert Olsson } 168119baf839SRobert Olsson 16822373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 16832373ce1cSRobert Olsson 168419baf839SRobert Olsson static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 168519baf839SRobert Olsson { 168619baf839SRobert Olsson struct node *c = (struct node *) thisleaf; 168719baf839SRobert Olsson struct tnode *p; 168819baf839SRobert Olsson int idx; 16892373ce1cSRobert Olsson struct node *trie = rcu_dereference(t->trie); 169019baf839SRobert Olsson 169119baf839SRobert Olsson if (c == NULL) { 16922373ce1cSRobert Olsson if (trie == NULL) 169319baf839SRobert Olsson return NULL; 169419baf839SRobert Olsson 16952373ce1cSRobert Olsson if (IS_LEAF(trie)) /* trie w. just a leaf */ 16962373ce1cSRobert Olsson return (struct leaf *) trie; 169719baf839SRobert Olsson 16982373ce1cSRobert Olsson p = (struct tnode*) trie; /* Start */ 169991b9a277SOlof Johansson } else 1700b59cfbf7SEric Dumazet p = node_parent_rcu(c); 1701c877efb2SStephen Hemminger 170219baf839SRobert Olsson while (p) { 170319baf839SRobert Olsson int pos, last; 170419baf839SRobert Olsson 170519baf839SRobert Olsson /* Find the next child of the parent */ 170619baf839SRobert Olsson if (c) 170719baf839SRobert Olsson pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 170819baf839SRobert Olsson else 170919baf839SRobert Olsson pos = 0; 171019baf839SRobert Olsson 171119baf839SRobert Olsson last = 1 << p->bits; 171219baf839SRobert Olsson for (idx = pos; idx < last ; idx++) { 17132373ce1cSRobert Olsson c = rcu_dereference(p->child[idx]); 17142373ce1cSRobert Olsson 17152373ce1cSRobert Olsson if (!c) 171691b9a277SOlof Johansson continue; 171719baf839SRobert Olsson 171819baf839SRobert Olsson /* Decend if tnode */ 17192373ce1cSRobert Olsson while (IS_TNODE(c)) { 17202373ce1cSRobert Olsson p = (struct tnode *) c; 172119baf839SRobert Olsson idx = 0; 172219baf839SRobert Olsson 172319baf839SRobert Olsson /* Rightmost non-NULL branch */ 172419baf839SRobert Olsson if (p && IS_TNODE(p)) 17252373ce1cSRobert Olsson while (!(c = rcu_dereference(p->child[idx])) 17262373ce1cSRobert Olsson && idx < (1<<p->bits)) idx++; 172719baf839SRobert Olsson 172819baf839SRobert Olsson /* Done with this tnode? */ 17292373ce1cSRobert Olsson if (idx >= (1 << p->bits) || !c) 173019baf839SRobert Olsson goto up; 173119baf839SRobert Olsson } 17322373ce1cSRobert Olsson return (struct leaf *) c; 173319baf839SRobert Olsson } 173419baf839SRobert Olsson up: 173519baf839SRobert Olsson /* No more children go up one step */ 173619baf839SRobert Olsson c = (struct node *) p; 1737b59cfbf7SEric Dumazet p = node_parent_rcu(c); 173819baf839SRobert Olsson } 173919baf839SRobert Olsson return NULL; /* Ready. Root of trie */ 174019baf839SRobert Olsson } 174119baf839SRobert Olsson 1742d562f1f8SRobert Olsson /* 1743d562f1f8SRobert Olsson * Caller must hold RTNL. 1744d562f1f8SRobert Olsson */ 174519baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb) 174619baf839SRobert Olsson { 174719baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 174819baf839SRobert Olsson struct leaf *ll = NULL, *l = NULL; 174919baf839SRobert Olsson int found = 0, h; 175019baf839SRobert Olsson 175119baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 175219baf839SRobert Olsson found += trie_flush_leaf(t, l); 175319baf839SRobert Olsson 175419baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 175519baf839SRobert Olsson trie_leaf_remove(t, ll->key); 175619baf839SRobert Olsson ll = l; 175719baf839SRobert Olsson } 175819baf839SRobert Olsson 175919baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 176019baf839SRobert Olsson trie_leaf_remove(t, ll->key); 176119baf839SRobert Olsson 17620c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 176319baf839SRobert Olsson return found; 176419baf839SRobert Olsson } 176519baf839SRobert Olsson 176619baf839SRobert Olsson static void 176719baf839SRobert Olsson fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 176819baf839SRobert Olsson { 176919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 177019baf839SRobert Olsson int order, last_idx; 177119baf839SRobert Olsson struct fib_info *fi = NULL; 177219baf839SRobert Olsson struct fib_info *last_resort; 177319baf839SRobert Olsson struct fib_alias *fa = NULL; 177419baf839SRobert Olsson struct list_head *fa_head; 177519baf839SRobert Olsson struct leaf *l; 177619baf839SRobert Olsson 177719baf839SRobert Olsson last_idx = -1; 177819baf839SRobert Olsson last_resort = NULL; 177919baf839SRobert Olsson order = -1; 178019baf839SRobert Olsson 17812373ce1cSRobert Olsson rcu_read_lock(); 178219baf839SRobert Olsson 178319baf839SRobert Olsson l = fib_find_node(t, 0); 178419baf839SRobert Olsson if (!l) 178519baf839SRobert Olsson goto out; 178619baf839SRobert Olsson 178719baf839SRobert Olsson fa_head = get_fa_head(l, 0); 178819baf839SRobert Olsson if (!fa_head) 178919baf839SRobert Olsson goto out; 179019baf839SRobert Olsson 179119baf839SRobert Olsson if (list_empty(fa_head)) 179219baf839SRobert Olsson goto out; 179319baf839SRobert Olsson 17942373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fa_head, fa_list) { 179519baf839SRobert Olsson struct fib_info *next_fi = fa->fa_info; 179619baf839SRobert Olsson 179719baf839SRobert Olsson if (fa->fa_scope != res->scope || 179819baf839SRobert Olsson fa->fa_type != RTN_UNICAST) 179919baf839SRobert Olsson continue; 180019baf839SRobert Olsson 180119baf839SRobert Olsson if (next_fi->fib_priority > res->fi->fib_priority) 180219baf839SRobert Olsson break; 180319baf839SRobert Olsson if (!next_fi->fib_nh[0].nh_gw || 180419baf839SRobert Olsson next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 180519baf839SRobert Olsson continue; 180619baf839SRobert Olsson fa->fa_state |= FA_S_ACCESSED; 180719baf839SRobert Olsson 180819baf839SRobert Olsson if (fi == NULL) { 180919baf839SRobert Olsson if (next_fi != res->fi) 181019baf839SRobert Olsson break; 181119baf839SRobert Olsson } else if (!fib_detect_death(fi, order, &last_resort, 1812971b893eSDenis V. Lunev &last_idx, tb->tb_default)) { 1813a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1814971b893eSDenis V. Lunev tb->tb_default = order; 181519baf839SRobert Olsson goto out; 181619baf839SRobert Olsson } 181719baf839SRobert Olsson fi = next_fi; 181819baf839SRobert Olsson order++; 181919baf839SRobert Olsson } 182019baf839SRobert Olsson if (order <= 0 || fi == NULL) { 1821971b893eSDenis V. Lunev tb->tb_default = -1; 182219baf839SRobert Olsson goto out; 182319baf839SRobert Olsson } 182419baf839SRobert Olsson 1825971b893eSDenis V. Lunev if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1826971b893eSDenis V. Lunev tb->tb_default)) { 1827a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1828971b893eSDenis V. Lunev tb->tb_default = order; 182919baf839SRobert Olsson goto out; 183019baf839SRobert Olsson } 1831a2bbe682SDenis V. Lunev if (last_idx >= 0) 1832a2bbe682SDenis V. Lunev fib_result_assign(res, last_resort); 1833971b893eSDenis V. Lunev tb->tb_default = last_idx; 1834971b893eSDenis V. Lunev out: 18352373ce1cSRobert Olsson rcu_read_unlock(); 183619baf839SRobert Olsson } 183719baf839SRobert Olsson 183819baf839SRobert Olsson static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 183919baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 184019baf839SRobert Olsson { 184119baf839SRobert Olsson int i, s_i; 184219baf839SRobert Olsson struct fib_alias *fa; 184319baf839SRobert Olsson 184432ab5f80SAl Viro __be32 xkey = htonl(key); 184519baf839SRobert Olsson 18461af5a8c4SPatrick McHardy s_i = cb->args[4]; 184719baf839SRobert Olsson i = 0; 184819baf839SRobert Olsson 18492373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 18502373ce1cSRobert Olsson 18512373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 185219baf839SRobert Olsson if (i < s_i) { 185319baf839SRobert Olsson i++; 185419baf839SRobert Olsson continue; 185519baf839SRobert Olsson } 185678c6671aSStephen Hemminger BUG_ON(!fa->fa_info); 185719baf839SRobert Olsson 185819baf839SRobert Olsson if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 185919baf839SRobert Olsson cb->nlh->nlmsg_seq, 186019baf839SRobert Olsson RTM_NEWROUTE, 186119baf839SRobert Olsson tb->tb_id, 186219baf839SRobert Olsson fa->fa_type, 186319baf839SRobert Olsson fa->fa_scope, 1864be403ea1SThomas Graf xkey, 186519baf839SRobert Olsson plen, 186619baf839SRobert Olsson fa->fa_tos, 186790f66914SDavid S. Miller fa->fa_info, 0) < 0) { 18681af5a8c4SPatrick McHardy cb->args[4] = i; 186919baf839SRobert Olsson return -1; 187019baf839SRobert Olsson } 187119baf839SRobert Olsson i++; 187219baf839SRobert Olsson } 18731af5a8c4SPatrick McHardy cb->args[4] = i; 187419baf839SRobert Olsson return skb->len; 187519baf839SRobert Olsson } 187619baf839SRobert Olsson 187719baf839SRobert Olsson static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 187819baf839SRobert Olsson struct netlink_callback *cb) 187919baf839SRobert Olsson { 188019baf839SRobert Olsson int h, s_h; 188119baf839SRobert Olsson struct list_head *fa_head; 188219baf839SRobert Olsson struct leaf *l = NULL; 188391b9a277SOlof Johansson 18841af5a8c4SPatrick McHardy s_h = cb->args[3]; 188519baf839SRobert Olsson 188619baf839SRobert Olsson for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 188719baf839SRobert Olsson if (h < s_h) 188819baf839SRobert Olsson continue; 188919baf839SRobert Olsson if (h > s_h) 18901af5a8c4SPatrick McHardy memset(&cb->args[4], 0, 18911af5a8c4SPatrick McHardy sizeof(cb->args) - 4*sizeof(cb->args[0])); 189219baf839SRobert Olsson 189319baf839SRobert Olsson fa_head = get_fa_head(l, plen); 189419baf839SRobert Olsson 189519baf839SRobert Olsson if (!fa_head) 189619baf839SRobert Olsson continue; 189719baf839SRobert Olsson 189819baf839SRobert Olsson if (list_empty(fa_head)) 189919baf839SRobert Olsson continue; 190019baf839SRobert Olsson 190119baf839SRobert Olsson if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 19021af5a8c4SPatrick McHardy cb->args[3] = h; 190319baf839SRobert Olsson return -1; 190419baf839SRobert Olsson } 190519baf839SRobert Olsson } 19061af5a8c4SPatrick McHardy cb->args[3] = h; 190719baf839SRobert Olsson return skb->len; 190819baf839SRobert Olsson } 190919baf839SRobert Olsson 191019baf839SRobert Olsson static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 191119baf839SRobert Olsson { 191219baf839SRobert Olsson int m, s_m; 191319baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 191419baf839SRobert Olsson 19151af5a8c4SPatrick McHardy s_m = cb->args[2]; 191619baf839SRobert Olsson 19172373ce1cSRobert Olsson rcu_read_lock(); 191819baf839SRobert Olsson for (m = 0; m <= 32; m++) { 191919baf839SRobert Olsson if (m < s_m) 192019baf839SRobert Olsson continue; 192119baf839SRobert Olsson if (m > s_m) 19221af5a8c4SPatrick McHardy memset(&cb->args[3], 0, 19231af5a8c4SPatrick McHardy sizeof(cb->args) - 3*sizeof(cb->args[0])); 192419baf839SRobert Olsson 192519baf839SRobert Olsson if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 19261af5a8c4SPatrick McHardy cb->args[2] = m; 192719baf839SRobert Olsson goto out; 192819baf839SRobert Olsson } 192919baf839SRobert Olsson } 19302373ce1cSRobert Olsson rcu_read_unlock(); 19311af5a8c4SPatrick McHardy cb->args[2] = m; 193219baf839SRobert Olsson return skb->len; 193319baf839SRobert Olsson out: 19342373ce1cSRobert Olsson rcu_read_unlock(); 193519baf839SRobert Olsson return -1; 193619baf839SRobert Olsson } 193719baf839SRobert Olsson 19387f9b8052SStephen Hemminger void __init fib_hash_init(void) 19397f9b8052SStephen Hemminger { 19407f9b8052SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 19417f9b8052SStephen Hemminger 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 19427f9b8052SStephen Hemminger } 194319baf839SRobert Olsson 19447f9b8052SStephen Hemminger 19457f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */ 19467f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id) 194719baf839SRobert Olsson { 194819baf839SRobert Olsson struct fib_table *tb; 194919baf839SRobert Olsson struct trie *t; 195019baf839SRobert Olsson 195119baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 195219baf839SRobert Olsson GFP_KERNEL); 195319baf839SRobert Olsson if (tb == NULL) 195419baf839SRobert Olsson return NULL; 195519baf839SRobert Olsson 195619baf839SRobert Olsson tb->tb_id = id; 1957971b893eSDenis V. Lunev tb->tb_default = -1; 195819baf839SRobert Olsson tb->tb_lookup = fn_trie_lookup; 195919baf839SRobert Olsson tb->tb_insert = fn_trie_insert; 196019baf839SRobert Olsson tb->tb_delete = fn_trie_delete; 196119baf839SRobert Olsson tb->tb_flush = fn_trie_flush; 196219baf839SRobert Olsson tb->tb_select_default = fn_trie_select_default; 196319baf839SRobert Olsson tb->tb_dump = fn_trie_dump; 196419baf839SRobert Olsson 196519baf839SRobert Olsson t = (struct trie *) tb->tb_data; 1966c28a1cf4SStephen Hemminger memset(t, 0, sizeof(*t)); 196719baf839SRobert Olsson 196819baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 196978c6671aSStephen Hemminger printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); 197019baf839SRobert Olsson 197119baf839SRobert Olsson return tb; 197219baf839SRobert Olsson } 197319baf839SRobert Olsson 1974cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 1975cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 1976cb7b593cSStephen Hemminger struct fib_trie_iter { 19771c340b2fSDenis V. Lunev struct seq_net_private p; 1978877a9bffSEric W. Biederman struct trie *trie_local, *trie_main; 1979cb7b593cSStephen Hemminger struct tnode *tnode; 1980cb7b593cSStephen Hemminger struct trie *trie; 1981cb7b593cSStephen Hemminger unsigned index; 1982cb7b593cSStephen Hemminger unsigned depth; 1983cb7b593cSStephen Hemminger }; 198419baf839SRobert Olsson 1985cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 198619baf839SRobert Olsson { 1987cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 1988cb7b593cSStephen Hemminger unsigned cindex = iter->index; 1989cb7b593cSStephen Hemminger struct tnode *p; 199019baf839SRobert Olsson 19916640e697SEric W. Biederman /* A single entry routing table */ 19926640e697SEric W. Biederman if (!tn) 19936640e697SEric W. Biederman return NULL; 19946640e697SEric W. Biederman 1995cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 1996cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 1997cb7b593cSStephen Hemminger rescan: 1998cb7b593cSStephen Hemminger while (cindex < (1<<tn->bits)) { 1999b59cfbf7SEric Dumazet struct node *n = tnode_get_child_rcu(tn, cindex); 200019baf839SRobert Olsson 2001cb7b593cSStephen Hemminger if (n) { 200219baf839SRobert Olsson if (IS_LEAF(n)) { 2003cb7b593cSStephen Hemminger iter->tnode = tn; 2004cb7b593cSStephen Hemminger iter->index = cindex + 1; 200591b9a277SOlof Johansson } else { 2006cb7b593cSStephen Hemminger /* push down one level */ 2007cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2008cb7b593cSStephen Hemminger iter->index = 0; 2009cb7b593cSStephen Hemminger ++iter->depth; 201019baf839SRobert Olsson } 2011cb7b593cSStephen Hemminger return n; 201219baf839SRobert Olsson } 201319baf839SRobert Olsson 2014cb7b593cSStephen Hemminger ++cindex; 2015cb7b593cSStephen Hemminger } 2016cb7b593cSStephen Hemminger 2017cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 2018b59cfbf7SEric Dumazet p = node_parent_rcu((struct node *)tn); 2019cb7b593cSStephen Hemminger if (p) { 2020cb7b593cSStephen Hemminger cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2021cb7b593cSStephen Hemminger tn = p; 2022cb7b593cSStephen Hemminger --iter->depth; 2023cb7b593cSStephen Hemminger goto rescan; 2024cb7b593cSStephen Hemminger } 2025cb7b593cSStephen Hemminger 2026cb7b593cSStephen Hemminger /* got root? */ 2027cb7b593cSStephen Hemminger return NULL; 2028cb7b593cSStephen Hemminger } 2029cb7b593cSStephen Hemminger 2030cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2031cb7b593cSStephen Hemminger struct trie *t) 2032cb7b593cSStephen Hemminger { 20335ddf0eb2SRobert Olsson struct node *n ; 20345ddf0eb2SRobert Olsson 20355ddf0eb2SRobert Olsson if (!t) 20365ddf0eb2SRobert Olsson return NULL; 20375ddf0eb2SRobert Olsson 20385ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 20395ddf0eb2SRobert Olsson 20405ddf0eb2SRobert Olsson if (!iter) 20415ddf0eb2SRobert Olsson return NULL; 2042cb7b593cSStephen Hemminger 20436640e697SEric W. Biederman if (n) { 20446640e697SEric W. Biederman if (IS_TNODE(n)) { 2045cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2046cb7b593cSStephen Hemminger iter->trie = t; 2047cb7b593cSStephen Hemminger iter->index = 0; 20481d25cd6cSRobert Olsson iter->depth = 1; 20496640e697SEric W. Biederman } else { 20506640e697SEric W. Biederman iter->tnode = NULL; 20516640e697SEric W. Biederman iter->trie = t; 20526640e697SEric W. Biederman iter->index = 0; 20536640e697SEric W. Biederman iter->depth = 0; 20546640e697SEric W. Biederman } 2055cb7b593cSStephen Hemminger return n; 2056cb7b593cSStephen Hemminger } 2057cb7b593cSStephen Hemminger return NULL; 2058cb7b593cSStephen Hemminger } 2059cb7b593cSStephen Hemminger 2060cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 206119baf839SRobert Olsson { 20622373ce1cSRobert Olsson struct node *n; 2063cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2064cb7b593cSStephen Hemminger 2065cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 206619baf839SRobert Olsson 20672373ce1cSRobert Olsson rcu_read_lock(); 2068cb7b593cSStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; 2069cb7b593cSStephen Hemminger n = fib_trie_get_next(&iter)) { 2070cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 207119baf839SRobert Olsson s->leaves++; 2072cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2073cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2074cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 207591b9a277SOlof Johansson } else { 2076cb7b593cSStephen Hemminger const struct tnode *tn = (const struct tnode *) n; 2077cb7b593cSStephen Hemminger int i; 207819baf839SRobert Olsson 207919baf839SRobert Olsson s->tnodes++; 208006ef921dSRobert Olsson if (tn->bits < MAX_STAT_DEPTH) 208119baf839SRobert Olsson s->nodesizes[tn->bits]++; 208206ef921dSRobert Olsson 2083cb7b593cSStephen Hemminger for (i = 0; i < (1<<tn->bits); i++) 2084cb7b593cSStephen Hemminger if (!tn->child[i]) 208519baf839SRobert Olsson s->nullpointers++; 208619baf839SRobert Olsson } 208719baf839SRobert Olsson } 20882373ce1cSRobert Olsson rcu_read_unlock(); 208919baf839SRobert Olsson } 209019baf839SRobert Olsson 209119baf839SRobert Olsson /* 209219baf839SRobert Olsson * This outputs /proc/net/fib_triestats 209319baf839SRobert Olsson */ 2094cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 209519baf839SRobert Olsson { 2096cb7b593cSStephen Hemminger unsigned i, max, pointers, bytes, avdepth; 209719baf839SRobert Olsson 209819baf839SRobert Olsson if (stat->leaves) 209919baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 210019baf839SRobert Olsson else 210119baf839SRobert Olsson avdepth = 0; 210219baf839SRobert Olsson 2103187b5188SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", avdepth / 100, avdepth % 100 ); 2104cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2105cb7b593cSStephen Hemminger 2106cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2107cb7b593cSStephen Hemminger 2108cb7b593cSStephen Hemminger bytes = sizeof(struct leaf) * stat->leaves; 2109187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 211019baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 211119baf839SRobert Olsson 211206ef921dSRobert Olsson max = MAX_STAT_DEPTH; 211306ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 211419baf839SRobert Olsson max--; 211519baf839SRobert Olsson 2116cb7b593cSStephen Hemminger pointers = 0; 211719baf839SRobert Olsson for (i = 1; i <= max; i++) 211819baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2119187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 212019baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 212119baf839SRobert Olsson } 2122cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2123187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2124cb7b593cSStephen Hemminger 212519baf839SRobert Olsson bytes += sizeof(struct node *) * pointers; 2126187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2127187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 212866a2f7fdSStephen Hemminger } 212919baf839SRobert Olsson 213019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 213166a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 213266a2f7fdSStephen Hemminger const struct trie_use_stats *stats) 213366a2f7fdSStephen Hemminger { 213466a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 213566a2f7fdSStephen Hemminger seq_printf(seq,"gets = %u\n", stats->gets); 213666a2f7fdSStephen Hemminger seq_printf(seq,"backtracks = %u\n", stats->backtrack); 213766a2f7fdSStephen Hemminger seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed); 213866a2f7fdSStephen Hemminger seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss); 213966a2f7fdSStephen Hemminger seq_printf(seq,"null node hit= %u\n", stats->null_node_hit); 214066a2f7fdSStephen Hemminger seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped); 214119baf839SRobert Olsson } 214266a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 214366a2f7fdSStephen Hemminger 2144d717a9a6SStephen Hemminger static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie) 2145d717a9a6SStephen Hemminger { 2146d717a9a6SStephen Hemminger struct trie_stat stat; 2147d717a9a6SStephen Hemminger 2148d717a9a6SStephen Hemminger seq_printf(seq, "%s: %d\n", name, trie->size); 2149d717a9a6SStephen Hemminger trie_collect_stats(trie, &stat); 2150d717a9a6SStephen Hemminger trie_show_stats(seq, &stat); 2151d717a9a6SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 2152d717a9a6SStephen Hemminger trie_show_usage(seq, &trie->stats); 2153d717a9a6SStephen Hemminger #endif 2154d717a9a6SStephen Hemminger } 215519baf839SRobert Olsson 215619baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 215719baf839SRobert Olsson { 21581c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 2159877a9bffSEric W. Biederman struct fib_table *tb; 2160877a9bffSEric W. Biederman 2161d717a9a6SStephen Hemminger seq_printf(seq, 2162d717a9a6SStephen Hemminger "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 216319baf839SRobert Olsson sizeof(struct leaf), sizeof(struct tnode)); 216419baf839SRobert Olsson 2165d717a9a6SStephen Hemminger tb = fib_get_table(net, RT_TABLE_LOCAL); 2166d717a9a6SStephen Hemminger if (tb) 2167d717a9a6SStephen Hemminger fib_trie_show(seq, "Local", (struct trie *) tb->tb_data); 2168cb7b593cSStephen Hemminger 2169d717a9a6SStephen Hemminger tb = fib_get_table(net, RT_TABLE_MAIN); 2170d717a9a6SStephen Hemminger if (tb) 2171d717a9a6SStephen Hemminger fib_trie_show(seq, "Main", (struct trie *) tb->tb_data); 2172cb7b593cSStephen Hemminger 217319baf839SRobert Olsson return 0; 217419baf839SRobert Olsson } 217519baf839SRobert Olsson 217619baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 217719baf839SRobert Olsson { 21781c340b2fSDenis V. Lunev int err; 21791c340b2fSDenis V. Lunev struct net *net; 21801c340b2fSDenis V. Lunev 21811c340b2fSDenis V. Lunev net = get_proc_net(inode); 21821c340b2fSDenis V. Lunev if (net == NULL) 21831c340b2fSDenis V. Lunev return -ENXIO; 21841c340b2fSDenis V. Lunev err = single_open(file, fib_triestat_seq_show, net); 21851c340b2fSDenis V. Lunev if (err < 0) { 21861c340b2fSDenis V. Lunev put_net(net); 21871c340b2fSDenis V. Lunev return err; 21881c340b2fSDenis V. Lunev } 21891c340b2fSDenis V. Lunev return 0; 21901c340b2fSDenis V. Lunev } 21911c340b2fSDenis V. Lunev 21921c340b2fSDenis V. Lunev static int fib_triestat_seq_release(struct inode *ino, struct file *f) 21931c340b2fSDenis V. Lunev { 21941c340b2fSDenis V. Lunev struct seq_file *seq = f->private_data; 21951c340b2fSDenis V. Lunev put_net(seq->private); 21961c340b2fSDenis V. Lunev return single_release(ino, f); 219719baf839SRobert Olsson } 219819baf839SRobert Olsson 21999a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 220019baf839SRobert Olsson .owner = THIS_MODULE, 220119baf839SRobert Olsson .open = fib_triestat_seq_open, 220219baf839SRobert Olsson .read = seq_read, 220319baf839SRobert Olsson .llseek = seq_lseek, 22041c340b2fSDenis V. Lunev .release = fib_triestat_seq_release, 220519baf839SRobert Olsson }; 220619baf839SRobert Olsson 2207cb7b593cSStephen Hemminger static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2208cb7b593cSStephen Hemminger loff_t pos) 220919baf839SRobert Olsson { 2210cb7b593cSStephen Hemminger loff_t idx = 0; 2211cb7b593cSStephen Hemminger struct node *n; 2212cb7b593cSStephen Hemminger 2213877a9bffSEric W. Biederman for (n = fib_trie_get_first(iter, iter->trie_local); 2214cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2215cb7b593cSStephen Hemminger if (pos == idx) 2216cb7b593cSStephen Hemminger return n; 221719baf839SRobert Olsson } 221819baf839SRobert Olsson 2219877a9bffSEric W. Biederman for (n = fib_trie_get_first(iter, iter->trie_main); 2220cb7b593cSStephen Hemminger n; ++idx, n = fib_trie_get_next(iter)) { 2221cb7b593cSStephen Hemminger if (pos == idx) 2222cb7b593cSStephen Hemminger return n; 222319baf839SRobert Olsson } 222419baf839SRobert Olsson return NULL; 222519baf839SRobert Olsson } 222619baf839SRobert Olsson 222719baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2228c95aaf9aSStephen Hemminger __acquires(RCU) 222919baf839SRobert Olsson { 2230877a9bffSEric W. Biederman struct fib_trie_iter *iter = seq->private; 2231877a9bffSEric W. Biederman struct fib_table *tb; 2232877a9bffSEric W. Biederman 2233877a9bffSEric W. Biederman if (!iter->trie_local) { 22341c340b2fSDenis V. Lunev tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL); 2235877a9bffSEric W. Biederman if (tb) 2236877a9bffSEric W. Biederman iter->trie_local = (struct trie *) tb->tb_data; 2237877a9bffSEric W. Biederman } 2238877a9bffSEric W. Biederman if (!iter->trie_main) { 22391c340b2fSDenis V. Lunev tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); 2240877a9bffSEric W. Biederman if (tb) 2241877a9bffSEric W. Biederman iter->trie_main = (struct trie *) tb->tb_data; 2242877a9bffSEric W. Biederman } 2243cb7b593cSStephen Hemminger rcu_read_lock(); 2244cb7b593cSStephen Hemminger if (*pos == 0) 224591b9a277SOlof Johansson return SEQ_START_TOKEN; 2246877a9bffSEric W. Biederman return fib_trie_get_idx(iter, *pos - 1); 224719baf839SRobert Olsson } 224819baf839SRobert Olsson 224919baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 225019baf839SRobert Olsson { 2251cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 2252cb7b593cSStephen Hemminger void *l = v; 2253cb7b593cSStephen Hemminger 225419baf839SRobert Olsson ++*pos; 225591b9a277SOlof Johansson if (v == SEQ_START_TOKEN) 2256cb7b593cSStephen Hemminger return fib_trie_get_idx(iter, 0); 225791b9a277SOlof Johansson 2258cb7b593cSStephen Hemminger v = fib_trie_get_next(iter); 2259cb7b593cSStephen Hemminger BUG_ON(v == l); 2260cb7b593cSStephen Hemminger if (v) 2261cb7b593cSStephen Hemminger return v; 2262cb7b593cSStephen Hemminger 2263cb7b593cSStephen Hemminger /* continue scan in next trie */ 2264877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2265877a9bffSEric W. Biederman return fib_trie_get_first(iter, iter->trie_main); 2266cb7b593cSStephen Hemminger 2267cb7b593cSStephen Hemminger return NULL; 226819baf839SRobert Olsson } 226919baf839SRobert Olsson 227019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2271c95aaf9aSStephen Hemminger __releases(RCU) 227219baf839SRobert Olsson { 2273cb7b593cSStephen Hemminger rcu_read_unlock(); 227419baf839SRobert Olsson } 227519baf839SRobert Olsson 2276cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2277cb7b593cSStephen Hemminger { 2278cb7b593cSStephen Hemminger while (n-- > 0) seq_puts(seq, " "); 2279cb7b593cSStephen Hemminger } 228019baf839SRobert Olsson 228128d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2282cb7b593cSStephen Hemminger { 2283cb7b593cSStephen Hemminger switch (s) { 2284cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2285cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2286cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2287cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2288cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2289cb7b593cSStephen Hemminger default: 229028d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2291cb7b593cSStephen Hemminger return buf; 2292cb7b593cSStephen Hemminger } 2293cb7b593cSStephen Hemminger } 2294cb7b593cSStephen Hemminger 2295cb7b593cSStephen Hemminger static const char *rtn_type_names[__RTN_MAX] = { 2296cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2297cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2298cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2299cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2300cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2301cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2302cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2303cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2304cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2305cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2306cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2307cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2308cb7b593cSStephen Hemminger }; 2309cb7b593cSStephen Hemminger 231028d36e37SEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2311cb7b593cSStephen Hemminger { 2312cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2313cb7b593cSStephen Hemminger return rtn_type_names[t]; 231428d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2315cb7b593cSStephen Hemminger return buf; 2316cb7b593cSStephen Hemminger } 2317cb7b593cSStephen Hemminger 2318cb7b593cSStephen Hemminger /* Pretty print the trie */ 231919baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 232019baf839SRobert Olsson { 2321cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2322cb7b593cSStephen Hemminger struct node *n = v; 232319baf839SRobert Olsson 2324cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) 2325cb7b593cSStephen Hemminger return 0; 232619baf839SRobert Olsson 2327b59cfbf7SEric Dumazet if (!node_parent_rcu(n)) { 2328877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2329cb7b593cSStephen Hemminger seq_puts(seq, "<local>:\n"); 2330cb7b593cSStephen Hemminger else 2331cb7b593cSStephen Hemminger seq_puts(seq, "<main>:\n"); 2332cb7b593cSStephen Hemminger } 2333095b8501SRobert Olsson 2334095b8501SRobert Olsson if (IS_TNODE(n)) { 2335095b8501SRobert Olsson struct tnode *tn = (struct tnode *) n; 2336ab66b4a7SStephen Hemminger __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2337095b8501SRobert Olsson 23381d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 23391d25cd6cSRobert Olsson seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", 23401d25cd6cSRobert Olsson NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 23411d25cd6cSRobert Olsson tn->empty_children); 23421d25cd6cSRobert Olsson 2343cb7b593cSStephen Hemminger } else { 2344cb7b593cSStephen Hemminger struct leaf *l = (struct leaf *) n; 2345cb7b593cSStephen Hemminger int i; 234632ab5f80SAl Viro __be32 val = htonl(l->key); 2347cb7b593cSStephen Hemminger 2348cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2349cb7b593cSStephen Hemminger seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2350cb7b593cSStephen Hemminger for (i = 32; i >= 0; i--) { 2351772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, i); 235228d36e37SEric Dumazet 2353cb7b593cSStephen Hemminger if (li) { 2354cb7b593cSStephen Hemminger struct fib_alias *fa; 235528d36e37SEric Dumazet 2356cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 235728d36e37SEric Dumazet char buf1[32], buf2[32]; 235828d36e37SEric Dumazet 2359cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 2360cb7b593cSStephen Hemminger seq_printf(seq, " /%d %s %s", i, 236128d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 236228d36e37SEric Dumazet fa->fa_scope), 236328d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 236428d36e37SEric Dumazet fa->fa_type)); 2365cb7b593cSStephen Hemminger if (fa->fa_tos) 2366cb7b593cSStephen Hemminger seq_printf(seq, "tos =%d\n", 2367cb7b593cSStephen Hemminger fa->fa_tos); 2368cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2369cb7b593cSStephen Hemminger } 2370cb7b593cSStephen Hemminger } 2371cb7b593cSStephen Hemminger } 237219baf839SRobert Olsson } 237319baf839SRobert Olsson 237419baf839SRobert Olsson return 0; 237519baf839SRobert Olsson } 237619baf839SRobert Olsson 2377f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 237819baf839SRobert Olsson .start = fib_trie_seq_start, 237919baf839SRobert Olsson .next = fib_trie_seq_next, 238019baf839SRobert Olsson .stop = fib_trie_seq_stop, 238119baf839SRobert Olsson .show = fib_trie_seq_show, 238219baf839SRobert Olsson }; 238319baf839SRobert Olsson 238419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 238519baf839SRobert Olsson { 23861c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2387cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 238819baf839SRobert Olsson } 238919baf839SRobert Olsson 23909a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 239119baf839SRobert Olsson .owner = THIS_MODULE, 239219baf839SRobert Olsson .open = fib_trie_seq_open, 239319baf839SRobert Olsson .read = seq_read, 239419baf839SRobert Olsson .llseek = seq_lseek, 23951c340b2fSDenis V. Lunev .release = seq_release_net, 239619baf839SRobert Olsson }; 239719baf839SRobert Olsson 239832ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2399cb7b593cSStephen Hemminger { 2400cb7b593cSStephen Hemminger static unsigned type2flags[RTN_MAX + 1] = { 2401cb7b593cSStephen Hemminger [7] = RTF_REJECT, [8] = RTF_REJECT, 2402cb7b593cSStephen Hemminger }; 2403cb7b593cSStephen Hemminger unsigned flags = type2flags[type]; 2404cb7b593cSStephen Hemminger 2405cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2406cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 240732ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2408cb7b593cSStephen Hemminger flags |= RTF_HOST; 2409cb7b593cSStephen Hemminger flags |= RTF_UP; 2410cb7b593cSStephen Hemminger return flags; 2411cb7b593cSStephen Hemminger } 2412cb7b593cSStephen Hemminger 2413cb7b593cSStephen Hemminger /* 2414cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2415cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2416cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2417cb7b593cSStephen Hemminger * legacy utilities 2418cb7b593cSStephen Hemminger */ 2419cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2420cb7b593cSStephen Hemminger { 2421c9e53cbeSPatrick McHardy const struct fib_trie_iter *iter = seq->private; 2422cb7b593cSStephen Hemminger struct leaf *l = v; 2423cb7b593cSStephen Hemminger int i; 2424cb7b593cSStephen Hemminger char bf[128]; 2425cb7b593cSStephen Hemminger 2426cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2427cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2428cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2429cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2430cb7b593cSStephen Hemminger return 0; 2431cb7b593cSStephen Hemminger } 2432cb7b593cSStephen Hemminger 2433877a9bffSEric W. Biederman if (iter->trie == iter->trie_local) 2434c9e53cbeSPatrick McHardy return 0; 2435cb7b593cSStephen Hemminger if (IS_TNODE(l)) 2436cb7b593cSStephen Hemminger return 0; 2437cb7b593cSStephen Hemminger 2438cb7b593cSStephen Hemminger for (i=32; i>=0; i--) { 2439772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, i); 2440cb7b593cSStephen Hemminger struct fib_alias *fa; 244132ab5f80SAl Viro __be32 mask, prefix; 2442cb7b593cSStephen Hemminger 2443cb7b593cSStephen Hemminger if (!li) 2444cb7b593cSStephen Hemminger continue; 2445cb7b593cSStephen Hemminger 2446cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2447cb7b593cSStephen Hemminger prefix = htonl(l->key); 2448cb7b593cSStephen Hemminger 2449cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 24501371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2451cb7b593cSStephen Hemminger unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2452cb7b593cSStephen Hemminger 2453cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2454cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2455cb7b593cSStephen Hemminger continue; 2456cb7b593cSStephen Hemminger 2457cb7b593cSStephen Hemminger if (fi) 2458cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2459cb7b593cSStephen Hemminger "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2460cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2461cb7b593cSStephen Hemminger prefix, 2462cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2463cb7b593cSStephen Hemminger fi->fib_priority, 2464cb7b593cSStephen Hemminger mask, 2465cb7b593cSStephen Hemminger (fi->fib_advmss ? fi->fib_advmss + 40 : 0), 2466cb7b593cSStephen Hemminger fi->fib_window, 2467cb7b593cSStephen Hemminger fi->fib_rtt >> 3); 2468cb7b593cSStephen Hemminger else 2469cb7b593cSStephen Hemminger snprintf(bf, sizeof(bf), 2470cb7b593cSStephen Hemminger "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 2471cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2472cb7b593cSStephen Hemminger mask, 0, 0, 0); 2473cb7b593cSStephen Hemminger 2474cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", bf); 2475cb7b593cSStephen Hemminger } 2476cb7b593cSStephen Hemminger } 2477cb7b593cSStephen Hemminger 2478cb7b593cSStephen Hemminger return 0; 2479cb7b593cSStephen Hemminger } 2480cb7b593cSStephen Hemminger 2481f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 2482cb7b593cSStephen Hemminger .start = fib_trie_seq_start, 2483cb7b593cSStephen Hemminger .next = fib_trie_seq_next, 2484cb7b593cSStephen Hemminger .stop = fib_trie_seq_stop, 2485cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2486cb7b593cSStephen Hemminger }; 2487cb7b593cSStephen Hemminger 2488cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2489cb7b593cSStephen Hemminger { 24901c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 2491cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 2492cb7b593cSStephen Hemminger } 2493cb7b593cSStephen Hemminger 24949a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2495cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2496cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2497cb7b593cSStephen Hemminger .read = seq_read, 2498cb7b593cSStephen Hemminger .llseek = seq_lseek, 24991c340b2fSDenis V. Lunev .release = seq_release_net, 2500cb7b593cSStephen Hemminger }; 2501cb7b593cSStephen Hemminger 250261a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 250319baf839SRobert Olsson { 250461a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2505cb7b593cSStephen Hemminger goto out1; 2506cb7b593cSStephen Hemminger 250761a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, 250861a02653SDenis V. Lunev &fib_triestat_fops)) 2509cb7b593cSStephen Hemminger goto out2; 2510cb7b593cSStephen Hemminger 251161a02653SDenis V. Lunev if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) 2512cb7b593cSStephen Hemminger goto out3; 2513cb7b593cSStephen Hemminger 251419baf839SRobert Olsson return 0; 2515cb7b593cSStephen Hemminger 2516cb7b593cSStephen Hemminger out3: 251761a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 2518cb7b593cSStephen Hemminger out2: 251961a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 2520cb7b593cSStephen Hemminger out1: 2521cb7b593cSStephen Hemminger return -ENOMEM; 252219baf839SRobert Olsson } 252319baf839SRobert Olsson 252461a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 252519baf839SRobert Olsson { 252661a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 252761a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 252861a02653SDenis V. Lunev proc_net_remove(net, "route"); 252919baf839SRobert Olsson } 253019baf839SRobert Olsson 253119baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2532