119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1519baf839SRobert Olsson * This work is based on the LPC-trie which is originally descibed in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 1919baf839SRobert Olsson * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 5419baf839SRobert Olsson #include <asm/system.h> 551977f032SJiri Slaby #include <linux/bitops.h> 5619baf839SRobert Olsson #include <linux/types.h> 5719baf839SRobert Olsson #include <linux/kernel.h> 5819baf839SRobert Olsson #include <linux/mm.h> 5919baf839SRobert Olsson #include <linux/string.h> 6019baf839SRobert Olsson #include <linux/socket.h> 6119baf839SRobert Olsson #include <linux/sockios.h> 6219baf839SRobert Olsson #include <linux/errno.h> 6319baf839SRobert Olsson #include <linux/in.h> 6419baf839SRobert Olsson #include <linux/inet.h> 65cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6619baf839SRobert Olsson #include <linux/netdevice.h> 6719baf839SRobert Olsson #include <linux/if_arp.h> 6819baf839SRobert Olsson #include <linux/proc_fs.h> 692373ce1cSRobert Olsson #include <linux/rcupdate.h> 7019baf839SRobert Olsson #include <linux/skbuff.h> 7119baf839SRobert Olsson #include <linux/netlink.h> 7219baf839SRobert Olsson #include <linux/init.h> 7319baf839SRobert Olsson #include <linux/list.h> 74457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7519baf839SRobert Olsson #include <net/ip.h> 7619baf839SRobert Olsson #include <net/protocol.h> 7719baf839SRobert Olsson #include <net/route.h> 7819baf839SRobert Olsson #include <net/tcp.h> 7919baf839SRobert Olsson #include <net/sock.h> 8019baf839SRobert Olsson #include <net/ip_fib.h> 8119baf839SRobert Olsson #include "fib_lookup.h" 8219baf839SRobert Olsson 8306ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8419baf839SRobert Olsson 8519baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8619baf839SRobert Olsson 8719baf839SRobert Olsson typedef unsigned int t_key; 8819baf839SRobert Olsson 8919baf839SRobert Olsson #define T_TNODE 0 9019baf839SRobert Olsson #define T_LEAF 1 9119baf839SRobert Olsson #define NODE_TYPE_MASK 0x1UL 922373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) 932373ce1cSRobert Olsson 9491b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF)) 9591b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF) 9619baf839SRobert Olsson 9719baf839SRobert Olsson struct node { 9891b9a277SOlof Johansson unsigned long parent; 998d965444SEric Dumazet t_key key; 10019baf839SRobert Olsson }; 10119baf839SRobert Olsson 10219baf839SRobert Olsson struct leaf { 10391b9a277SOlof Johansson unsigned long parent; 1048d965444SEric Dumazet t_key key; 10519baf839SRobert Olsson struct hlist_head list; 1062373ce1cSRobert Olsson struct rcu_head rcu; 10719baf839SRobert Olsson }; 10819baf839SRobert Olsson 10919baf839SRobert Olsson struct leaf_info { 11019baf839SRobert Olsson struct hlist_node hlist; 1112373ce1cSRobert Olsson struct rcu_head rcu; 11219baf839SRobert Olsson int plen; 11319baf839SRobert Olsson struct list_head falh; 11419baf839SRobert Olsson }; 11519baf839SRobert Olsson 11619baf839SRobert Olsson struct tnode { 11791b9a277SOlof Johansson unsigned long parent; 1188d965444SEric Dumazet t_key key; 119112d8cfcSEric Dumazet unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 120112d8cfcSEric Dumazet unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 1218d965444SEric Dumazet unsigned int full_children; /* KEYLENGTH bits needed */ 1228d965444SEric Dumazet unsigned int empty_children; /* KEYLENGTH bits needed */ 12315be75cdSStephen Hemminger union { 1242373ce1cSRobert Olsson struct rcu_head rcu; 12515be75cdSStephen Hemminger struct work_struct work; 126e0f7cb8cSJarek Poplawski struct tnode *tnode_free; 12715be75cdSStephen Hemminger }; 12819baf839SRobert Olsson struct node *child[0]; 12919baf839SRobert Olsson }; 13019baf839SRobert Olsson 13119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13219baf839SRobert Olsson struct trie_use_stats { 13319baf839SRobert Olsson unsigned int gets; 13419baf839SRobert Olsson unsigned int backtrack; 13519baf839SRobert Olsson unsigned int semantic_match_passed; 13619baf839SRobert Olsson unsigned int semantic_match_miss; 13719baf839SRobert Olsson unsigned int null_node_hit; 1382f36895aSRobert Olsson unsigned int resize_node_skipped; 13919baf839SRobert Olsson }; 14019baf839SRobert Olsson #endif 14119baf839SRobert Olsson 14219baf839SRobert Olsson struct trie_stat { 14319baf839SRobert Olsson unsigned int totdepth; 14419baf839SRobert Olsson unsigned int maxdepth; 14519baf839SRobert Olsson unsigned int tnodes; 14619baf839SRobert Olsson unsigned int leaves; 14719baf839SRobert Olsson unsigned int nullpointers; 14893672292SStephen Hemminger unsigned int prefixes; 14906ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 15019baf839SRobert Olsson }; 15119baf839SRobert Olsson 15219baf839SRobert Olsson struct trie { 15319baf839SRobert Olsson struct node *trie; 15419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 15519baf839SRobert Olsson struct trie_use_stats stats; 15619baf839SRobert Olsson #endif 15719baf839SRobert Olsson }; 15819baf839SRobert Olsson 15919baf839SRobert Olsson static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 160a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 161a07f5f50SStephen Hemminger int wasfull); 16219baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn); 1632f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn); 1642f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn); 165e0f7cb8cSJarek Poplawski /* tnodes to free after resize(); protected by RTNL */ 166e0f7cb8cSJarek Poplawski static struct tnode *tnode_free_head; 167c3059477SJarek Poplawski static size_t tnode_free_size; 168c3059477SJarek Poplawski 169c3059477SJarek Poplawski /* 170c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 171c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 172c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 173c3059477SJarek Poplawski */ 174c3059477SJarek Poplawski static const int sync_pages = 128; 17519baf839SRobert Olsson 176e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 177bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 17819baf839SRobert Olsson 17906801916SStephen Hemminger static inline struct tnode *node_parent(struct node *node) 18006801916SStephen Hemminger { 181b59cfbf7SEric Dumazet return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 182b59cfbf7SEric Dumazet } 18306801916SStephen Hemminger 184b59cfbf7SEric Dumazet static inline struct tnode *node_parent_rcu(struct node *node) 185b59cfbf7SEric Dumazet { 186b59cfbf7SEric Dumazet struct tnode *ret = node_parent(node); 187b59cfbf7SEric Dumazet 18806801916SStephen Hemminger return rcu_dereference(ret); 18906801916SStephen Hemminger } 19006801916SStephen Hemminger 1916440cc9eSStephen Hemminger /* Same as rcu_assign_pointer 1926440cc9eSStephen Hemminger * but that macro() assumes that value is a pointer. 1936440cc9eSStephen Hemminger */ 19406801916SStephen Hemminger static inline void node_set_parent(struct node *node, struct tnode *ptr) 19506801916SStephen Hemminger { 1966440cc9eSStephen Hemminger smp_wmb(); 1976440cc9eSStephen Hemminger node->parent = (unsigned long)ptr | NODE_TYPE(node); 19806801916SStephen Hemminger } 1992373ce1cSRobert Olsson 200b59cfbf7SEric Dumazet static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 20119baf839SRobert Olsson { 202b59cfbf7SEric Dumazet BUG_ON(i >= 1U << tn->bits); 20319baf839SRobert Olsson 204b59cfbf7SEric Dumazet return tn->child[i]; 205b59cfbf7SEric Dumazet } 206b59cfbf7SEric Dumazet 207b59cfbf7SEric Dumazet static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 208b59cfbf7SEric Dumazet { 209b59cfbf7SEric Dumazet struct node *ret = tnode_get_child(tn, i); 210b59cfbf7SEric Dumazet 211b59cfbf7SEric Dumazet return rcu_dereference(ret); 21219baf839SRobert Olsson } 21319baf839SRobert Olsson 214bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn) 21519baf839SRobert Olsson { 21619baf839SRobert Olsson return 1 << tn->bits; 21719baf839SRobert Olsson } 21819baf839SRobert Olsson 219ab66b4a7SStephen Hemminger static inline t_key mask_pfx(t_key k, unsigned short l) 220ab66b4a7SStephen Hemminger { 221ab66b4a7SStephen Hemminger return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 222ab66b4a7SStephen Hemminger } 223ab66b4a7SStephen Hemminger 22419baf839SRobert Olsson static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 22519baf839SRobert Olsson { 22619baf839SRobert Olsson if (offset < KEYLENGTH) 22719baf839SRobert Olsson return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 22819baf839SRobert Olsson else 22919baf839SRobert Olsson return 0; 23019baf839SRobert Olsson } 23119baf839SRobert Olsson 23219baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b) 23319baf839SRobert Olsson { 23419baf839SRobert Olsson return a == b; 23519baf839SRobert Olsson } 23619baf839SRobert Olsson 23719baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 23819baf839SRobert Olsson { 23919baf839SRobert Olsson if (bits == 0 || offset >= KEYLENGTH) 24019baf839SRobert Olsson return 1; 24119baf839SRobert Olsson bits = bits > KEYLENGTH ? KEYLENGTH : bits; 24219baf839SRobert Olsson return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 24319baf839SRobert Olsson } 24419baf839SRobert Olsson 24519baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b) 24619baf839SRobert Olsson { 24719baf839SRobert Olsson t_key diff = a ^ b; 24819baf839SRobert Olsson int i = offset; 24919baf839SRobert Olsson 25019baf839SRobert Olsson if (!diff) 25119baf839SRobert Olsson return 0; 25219baf839SRobert Olsson while ((diff << i) >> (KEYLENGTH-1) == 0) 25319baf839SRobert Olsson i++; 25419baf839SRobert Olsson return i; 25519baf839SRobert Olsson } 25619baf839SRobert Olsson 25719baf839SRobert Olsson /* 25819baf839SRobert Olsson To understand this stuff, an understanding of keys and all their bits is 25919baf839SRobert Olsson necessary. Every node in the trie has a key associated with it, but not 26019baf839SRobert Olsson all of the bits in that key are significant. 26119baf839SRobert Olsson 26219baf839SRobert Olsson Consider a node 'n' and its parent 'tp'. 26319baf839SRobert Olsson 26419baf839SRobert Olsson If n is a leaf, every bit in its key is significant. Its presence is 265772cb712SRobert Olsson necessitated by path compression, since during a tree traversal (when 26619baf839SRobert Olsson searching for a leaf - unless we are doing an insertion) we will completely 26719baf839SRobert Olsson ignore all skipped bits we encounter. Thus we need to verify, at the end of 26819baf839SRobert Olsson a potentially successful search, that we have indeed been walking the 26919baf839SRobert Olsson correct key path. 27019baf839SRobert Olsson 27119baf839SRobert Olsson Note that we can never "miss" the correct key in the tree if present by 27219baf839SRobert Olsson following the wrong path. Path compression ensures that segments of the key 27319baf839SRobert Olsson that are the same for all keys with a given prefix are skipped, but the 27419baf839SRobert Olsson skipped part *is* identical for each node in the subtrie below the skipped 27519baf839SRobert Olsson bit! trie_insert() in this implementation takes care of that - note the 27619baf839SRobert Olsson call to tkey_sub_equals() in trie_insert(). 27719baf839SRobert Olsson 27819baf839SRobert Olsson if n is an internal node - a 'tnode' here, the various parts of its key 27919baf839SRobert Olsson have many different meanings. 28019baf839SRobert Olsson 28119baf839SRobert Olsson Example: 28219baf839SRobert Olsson _________________________________________________________________ 28319baf839SRobert Olsson | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 28419baf839SRobert Olsson ----------------------------------------------------------------- 28519baf839SRobert Olsson 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 28619baf839SRobert Olsson 28719baf839SRobert Olsson _________________________________________________________________ 28819baf839SRobert Olsson | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 28919baf839SRobert Olsson ----------------------------------------------------------------- 29019baf839SRobert Olsson 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 29119baf839SRobert Olsson 29219baf839SRobert Olsson tp->pos = 7 29319baf839SRobert Olsson tp->bits = 3 29419baf839SRobert Olsson n->pos = 15 29519baf839SRobert Olsson n->bits = 4 29619baf839SRobert Olsson 29719baf839SRobert Olsson First, let's just ignore the bits that come before the parent tp, that is 29819baf839SRobert Olsson the bits from 0 to (tp->pos-1). They are *known* but at this point we do 29919baf839SRobert Olsson not use them for anything. 30019baf839SRobert Olsson 30119baf839SRobert Olsson The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 30219baf839SRobert Olsson index into the parent's child array. That is, they will be used to find 30319baf839SRobert Olsson 'n' among tp's children. 30419baf839SRobert Olsson 30519baf839SRobert Olsson The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 30619baf839SRobert Olsson for the node n. 30719baf839SRobert Olsson 30819baf839SRobert Olsson All the bits we have seen so far are significant to the node n. The rest 30919baf839SRobert Olsson of the bits are really not needed or indeed known in n->key. 31019baf839SRobert Olsson 31119baf839SRobert Olsson The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 31219baf839SRobert Olsson n's child array, and will of course be different for each child. 31319baf839SRobert Olsson 314c877efb2SStephen Hemminger 31519baf839SRobert Olsson The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 31619baf839SRobert Olsson at this point. 31719baf839SRobert Olsson 31819baf839SRobert Olsson */ 31919baf839SRobert Olsson 3200c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn) 32119baf839SRobert Olsson { 3220c7770c7SStephen Hemminger WARN_ON(tn && tn->pos+tn->bits > 32); 32319baf839SRobert Olsson } 32419baf839SRobert Olsson 325f5026fabSDenis V. Lunev static const int halve_threshold = 25; 326f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 327345aa031SJarek Poplawski static const int halve_threshold_root = 15; 32880b71b80SJens Låås static const int inflate_threshold_root = 30; 3292373ce1cSRobert Olsson 3302373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3312373ce1cSRobert Olsson { 3322373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3332373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3342373ce1cSRobert Olsson } 3352373ce1cSRobert Olsson 3362373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3372373ce1cSRobert Olsson { 3382373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3392373ce1cSRobert Olsson } 3402373ce1cSRobert Olsson 3412373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head) 3422373ce1cSRobert Olsson { 343bc3c8c1eSStephen Hemminger struct leaf *l = container_of(head, struct leaf, rcu); 344bc3c8c1eSStephen Hemminger kmem_cache_free(trie_leaf_kmem, l); 3452373ce1cSRobert Olsson } 3462373ce1cSRobert Olsson 347387a5487SStephen Hemminger static inline void free_leaf(struct leaf *l) 348387a5487SStephen Hemminger { 349387a5487SStephen Hemminger call_rcu_bh(&l->rcu, __leaf_free_rcu); 350387a5487SStephen Hemminger } 351387a5487SStephen Hemminger 3522373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head) 3532373ce1cSRobert Olsson { 3542373ce1cSRobert Olsson kfree(container_of(head, struct leaf_info, rcu)); 3552373ce1cSRobert Olsson } 3562373ce1cSRobert Olsson 3572373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 3582373ce1cSRobert Olsson { 3592373ce1cSRobert Olsson call_rcu(&leaf->rcu, __leaf_info_free_rcu); 3602373ce1cSRobert Olsson } 3612373ce1cSRobert Olsson 3628d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size) 3632373ce1cSRobert Olsson { 3642373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3658d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 36615be75cdSStephen Hemminger else 36715be75cdSStephen Hemminger return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 36815be75cdSStephen Hemminger } 3692373ce1cSRobert Olsson 37015be75cdSStephen Hemminger static void __tnode_vfree(struct work_struct *arg) 37115be75cdSStephen Hemminger { 37215be75cdSStephen Hemminger struct tnode *tn = container_of(arg, struct tnode, work); 37315be75cdSStephen Hemminger vfree(tn); 3742373ce1cSRobert Olsson } 3752373ce1cSRobert Olsson 3762373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head) 3772373ce1cSRobert Olsson { 3782373ce1cSRobert Olsson struct tnode *tn = container_of(head, struct tnode, rcu); 3798d965444SEric Dumazet size_t size = sizeof(struct tnode) + 3808d965444SEric Dumazet (sizeof(struct node *) << tn->bits); 3812373ce1cSRobert Olsson 3822373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3832373ce1cSRobert Olsson kfree(tn); 38415be75cdSStephen Hemminger else { 38515be75cdSStephen Hemminger INIT_WORK(&tn->work, __tnode_vfree); 38615be75cdSStephen Hemminger schedule_work(&tn->work); 38715be75cdSStephen Hemminger } 3882373ce1cSRobert Olsson } 3892373ce1cSRobert Olsson 3902373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn) 3912373ce1cSRobert Olsson { 392387a5487SStephen Hemminger if (IS_LEAF(tn)) 393387a5487SStephen Hemminger free_leaf((struct leaf *) tn); 394387a5487SStephen Hemminger else 3952373ce1cSRobert Olsson call_rcu(&tn->rcu, __tnode_free_rcu); 3962373ce1cSRobert Olsson } 3972373ce1cSRobert Olsson 398e0f7cb8cSJarek Poplawski static void tnode_free_safe(struct tnode *tn) 399e0f7cb8cSJarek Poplawski { 400e0f7cb8cSJarek Poplawski BUG_ON(IS_LEAF(tn)); 401e0f7cb8cSJarek Poplawski tn->tnode_free = tnode_free_head; 402e0f7cb8cSJarek Poplawski tnode_free_head = tn; 403c3059477SJarek Poplawski tnode_free_size += sizeof(struct tnode) + 404c3059477SJarek Poplawski (sizeof(struct node *) << tn->bits); 405e0f7cb8cSJarek Poplawski } 406e0f7cb8cSJarek Poplawski 407e0f7cb8cSJarek Poplawski static void tnode_free_flush(void) 408e0f7cb8cSJarek Poplawski { 409e0f7cb8cSJarek Poplawski struct tnode *tn; 410e0f7cb8cSJarek Poplawski 411e0f7cb8cSJarek Poplawski while ((tn = tnode_free_head)) { 412e0f7cb8cSJarek Poplawski tnode_free_head = tn->tnode_free; 413e0f7cb8cSJarek Poplawski tn->tnode_free = NULL; 414e0f7cb8cSJarek Poplawski tnode_free(tn); 415e0f7cb8cSJarek Poplawski } 416c3059477SJarek Poplawski 417c3059477SJarek Poplawski if (tnode_free_size >= PAGE_SIZE * sync_pages) { 418c3059477SJarek Poplawski tnode_free_size = 0; 419c3059477SJarek Poplawski synchronize_rcu(); 420c3059477SJarek Poplawski } 421e0f7cb8cSJarek Poplawski } 422e0f7cb8cSJarek Poplawski 42319baf839SRobert Olsson static struct leaf *leaf_new(void) 42419baf839SRobert Olsson { 425bc3c8c1eSStephen Hemminger struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 42619baf839SRobert Olsson if (l) { 4272373ce1cSRobert Olsson l->parent = T_LEAF; 42819baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 42919baf839SRobert Olsson } 43019baf839SRobert Olsson return l; 43119baf839SRobert Olsson } 43219baf839SRobert Olsson 43319baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 43419baf839SRobert Olsson { 43519baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 4362373ce1cSRobert Olsson if (li) { 43719baf839SRobert Olsson li->plen = plen; 43819baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 4392373ce1cSRobert Olsson } 44019baf839SRobert Olsson return li; 44119baf839SRobert Olsson } 44219baf839SRobert Olsson 44319baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits) 44419baf839SRobert Olsson { 4458d965444SEric Dumazet size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 446f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 44719baf839SRobert Olsson 44819baf839SRobert Olsson if (tn) { 4492373ce1cSRobert Olsson tn->parent = T_TNODE; 45019baf839SRobert Olsson tn->pos = pos; 45119baf839SRobert Olsson tn->bits = bits; 45219baf839SRobert Olsson tn->key = key; 45319baf839SRobert Olsson tn->full_children = 0; 45419baf839SRobert Olsson tn->empty_children = 1<<bits; 45519baf839SRobert Olsson } 456c877efb2SStephen Hemminger 4578d965444SEric Dumazet pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 4588d965444SEric Dumazet (unsigned long) (sizeof(struct node) << bits)); 45919baf839SRobert Olsson return tn; 46019baf839SRobert Olsson } 46119baf839SRobert Olsson 46219baf839SRobert Olsson /* 46319baf839SRobert Olsson * Check whether a tnode 'n' is "full", i.e. it is an internal node 46419baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 46519baf839SRobert Olsson */ 46619baf839SRobert Olsson 467bb435b8dSStephen Hemmigner static inline int tnode_full(const struct tnode *tn, const struct node *n) 46819baf839SRobert Olsson { 46919baf839SRobert Olsson if (n == NULL || IS_LEAF(n)) 47019baf839SRobert Olsson return 0; 47119baf839SRobert Olsson 47219baf839SRobert Olsson return ((struct tnode *) n)->pos == tn->pos + tn->bits; 47319baf839SRobert Olsson } 47419baf839SRobert Olsson 475a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i, 476a07f5f50SStephen Hemminger struct node *n) 47719baf839SRobert Olsson { 47819baf839SRobert Olsson tnode_put_child_reorg(tn, i, n, -1); 47919baf839SRobert Olsson } 48019baf839SRobert Olsson 48119baf839SRobert Olsson /* 48219baf839SRobert Olsson * Add a child at position i overwriting the old value. 48319baf839SRobert Olsson * Update the value of full_children and empty_children. 48419baf839SRobert Olsson */ 48519baf839SRobert Olsson 486a07f5f50SStephen Hemminger static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 487a07f5f50SStephen Hemminger int wasfull) 48819baf839SRobert Olsson { 4892373ce1cSRobert Olsson struct node *chi = tn->child[i]; 49019baf839SRobert Olsson int isfull; 49119baf839SRobert Olsson 4920c7770c7SStephen Hemminger BUG_ON(i >= 1<<tn->bits); 4930c7770c7SStephen Hemminger 49419baf839SRobert Olsson /* update emptyChildren */ 49519baf839SRobert Olsson if (n == NULL && chi != NULL) 49619baf839SRobert Olsson tn->empty_children++; 49719baf839SRobert Olsson else if (n != NULL && chi == NULL) 49819baf839SRobert Olsson tn->empty_children--; 49919baf839SRobert Olsson 50019baf839SRobert Olsson /* update fullChildren */ 50119baf839SRobert Olsson if (wasfull == -1) 50219baf839SRobert Olsson wasfull = tnode_full(tn, chi); 50319baf839SRobert Olsson 50419baf839SRobert Olsson isfull = tnode_full(tn, n); 50519baf839SRobert Olsson if (wasfull && !isfull) 50619baf839SRobert Olsson tn->full_children--; 50719baf839SRobert Olsson else if (!wasfull && isfull) 50819baf839SRobert Olsson tn->full_children++; 50991b9a277SOlof Johansson 51019baf839SRobert Olsson if (n) 51106801916SStephen Hemminger node_set_parent(n, tn); 51219baf839SRobert Olsson 5132373ce1cSRobert Olsson rcu_assign_pointer(tn->child[i], n); 51419baf839SRobert Olsson } 51519baf839SRobert Olsson 51680b71b80SJens Låås #define MAX_WORK 10 51719baf839SRobert Olsson static struct node *resize(struct trie *t, struct tnode *tn) 51819baf839SRobert Olsson { 51919baf839SRobert Olsson int i; 5202f80b3c8SRobert Olsson struct tnode *old_tn; 521e6308be8SRobert Olsson int inflate_threshold_use; 522e6308be8SRobert Olsson int halve_threshold_use; 52380b71b80SJens Låås int max_work; 52419baf839SRobert Olsson 52519baf839SRobert Olsson if (!tn) 52619baf839SRobert Olsson return NULL; 52719baf839SRobert Olsson 5280c7770c7SStephen Hemminger pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 52919baf839SRobert Olsson tn, inflate_threshold, halve_threshold); 53019baf839SRobert Olsson 53119baf839SRobert Olsson /* No children */ 53219baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn)) { 533e0f7cb8cSJarek Poplawski tnode_free_safe(tn); 53419baf839SRobert Olsson return NULL; 53519baf839SRobert Olsson } 53619baf839SRobert Olsson /* One child */ 53719baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 53880b71b80SJens Låås goto one_child; 53919baf839SRobert Olsson /* 54019baf839SRobert Olsson * Double as long as the resulting node has a number of 54119baf839SRobert Olsson * nonempty nodes that are above the threshold. 54219baf839SRobert Olsson */ 54319baf839SRobert Olsson 54419baf839SRobert Olsson /* 54519baf839SRobert Olsson * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 54619baf839SRobert Olsson * the Helsinki University of Technology and Matti Tikkanen of Nokia 54719baf839SRobert Olsson * Telecommunications, page 6: 54819baf839SRobert Olsson * "A node is doubled if the ratio of non-empty children to all 54919baf839SRobert Olsson * children in the *doubled* node is at least 'high'." 55019baf839SRobert Olsson * 55119baf839SRobert Olsson * 'high' in this instance is the variable 'inflate_threshold'. It 55219baf839SRobert Olsson * is expressed as a percentage, so we multiply it with 55319baf839SRobert Olsson * tnode_child_length() and instead of multiplying by 2 (since the 55419baf839SRobert Olsson * child array will be doubled by inflate()) and multiplying 55519baf839SRobert Olsson * the left-hand side by 100 (to handle the percentage thing) we 55619baf839SRobert Olsson * multiply the left-hand side by 50. 55719baf839SRobert Olsson * 55819baf839SRobert Olsson * The left-hand side may look a bit weird: tnode_child_length(tn) 55919baf839SRobert Olsson * - tn->empty_children is of course the number of non-null children 56019baf839SRobert Olsson * in the current node. tn->full_children is the number of "full" 56119baf839SRobert Olsson * children, that is non-null tnodes with a skip value of 0. 56219baf839SRobert Olsson * All of those will be doubled in the resulting inflated tnode, so 56319baf839SRobert Olsson * we just count them one extra time here. 56419baf839SRobert Olsson * 56519baf839SRobert Olsson * A clearer way to write this would be: 56619baf839SRobert Olsson * 56719baf839SRobert Olsson * to_be_doubled = tn->full_children; 56819baf839SRobert Olsson * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 56919baf839SRobert Olsson * tn->full_children; 57019baf839SRobert Olsson * 57119baf839SRobert Olsson * new_child_length = tnode_child_length(tn) * 2; 57219baf839SRobert Olsson * 57319baf839SRobert Olsson * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 57419baf839SRobert Olsson * new_child_length; 57519baf839SRobert Olsson * if (new_fill_factor >= inflate_threshold) 57619baf839SRobert Olsson * 57719baf839SRobert Olsson * ...and so on, tho it would mess up the while () loop. 57819baf839SRobert Olsson * 57919baf839SRobert Olsson * anyway, 58019baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 58119baf839SRobert Olsson * inflate_threshold 58219baf839SRobert Olsson * 58319baf839SRobert Olsson * avoid a division: 58419baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 58519baf839SRobert Olsson * inflate_threshold * new_child_length 58619baf839SRobert Olsson * 58719baf839SRobert Olsson * expand not_to_be_doubled and to_be_doubled, and shorten: 58819baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 58919baf839SRobert Olsson * tn->full_children) >= inflate_threshold * new_child_length 59019baf839SRobert Olsson * 59119baf839SRobert Olsson * expand new_child_length: 59219baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 59319baf839SRobert Olsson * tn->full_children) >= 59419baf839SRobert Olsson * inflate_threshold * tnode_child_length(tn) * 2 59519baf839SRobert Olsson * 59619baf839SRobert Olsson * shorten again: 59719baf839SRobert Olsson * 50 * (tn->full_children + tnode_child_length(tn) - 59819baf839SRobert Olsson * tn->empty_children) >= inflate_threshold * 59919baf839SRobert Olsson * tnode_child_length(tn) 60019baf839SRobert Olsson * 60119baf839SRobert Olsson */ 60219baf839SRobert Olsson 60319baf839SRobert Olsson check_tnode(tn); 60419baf839SRobert Olsson 605e6308be8SRobert Olsson /* Keep root node larger */ 606e6308be8SRobert Olsson 60780b71b80SJens Låås if (!node_parent((struct node*) tn)) { 60880b71b80SJens Låås inflate_threshold_use = inflate_threshold_root; 60980b71b80SJens Låås halve_threshold_use = halve_threshold_root; 61080b71b80SJens Låås } 61180b71b80SJens Låås else { 612e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold; 61380b71b80SJens Låås halve_threshold_use = halve_threshold; 61480b71b80SJens Låås } 615e6308be8SRobert Olsson 61680b71b80SJens Låås max_work = MAX_WORK; 61780b71b80SJens Låås while ((tn->full_children > 0 && max_work-- && 618a07f5f50SStephen Hemminger 50 * (tn->full_children + tnode_child_length(tn) 619a07f5f50SStephen Hemminger - tn->empty_children) 620a07f5f50SStephen Hemminger >= inflate_threshold_use * tnode_child_length(tn))) { 62119baf839SRobert Olsson 6222f80b3c8SRobert Olsson old_tn = tn; 6232f80b3c8SRobert Olsson tn = inflate(t, tn); 624a07f5f50SStephen Hemminger 6252f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6262f80b3c8SRobert Olsson tn = old_tn; 6272f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6282f36895aSRobert Olsson t->stats.resize_node_skipped++; 6292f36895aSRobert Olsson #endif 6302f36895aSRobert Olsson break; 6312f36895aSRobert Olsson } 63219baf839SRobert Olsson } 63319baf839SRobert Olsson 63419baf839SRobert Olsson check_tnode(tn); 63519baf839SRobert Olsson 63680b71b80SJens Låås /* Return if at least one inflate is run */ 63780b71b80SJens Låås if( max_work != MAX_WORK) 63880b71b80SJens Låås return (struct node *) tn; 63980b71b80SJens Låås 64019baf839SRobert Olsson /* 64119baf839SRobert Olsson * Halve as long as the number of empty children in this 64219baf839SRobert Olsson * node is above threshold. 64319baf839SRobert Olsson */ 6442f36895aSRobert Olsson 64580b71b80SJens Låås max_work = MAX_WORK; 64680b71b80SJens Låås while (tn->bits > 1 && max_work-- && 64719baf839SRobert Olsson 100 * (tnode_child_length(tn) - tn->empty_children) < 648e6308be8SRobert Olsson halve_threshold_use * tnode_child_length(tn)) { 64919baf839SRobert Olsson 6502f80b3c8SRobert Olsson old_tn = tn; 6512f80b3c8SRobert Olsson tn = halve(t, tn); 6522f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6532f80b3c8SRobert Olsson tn = old_tn; 6542f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6552f36895aSRobert Olsson t->stats.resize_node_skipped++; 6562f36895aSRobert Olsson #endif 6572f36895aSRobert Olsson break; 6582f36895aSRobert Olsson } 6592f36895aSRobert Olsson } 6602f36895aSRobert Olsson 66119baf839SRobert Olsson 66219baf839SRobert Olsson /* Only one child remains */ 66380b71b80SJens Låås if (tn->empty_children == tnode_child_length(tn) - 1) { 66480b71b80SJens Låås one_child: 66519baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 66691b9a277SOlof Johansson struct node *n; 66719baf839SRobert Olsson 66891b9a277SOlof Johansson n = tn->child[i]; 6692373ce1cSRobert Olsson if (!n) 67091b9a277SOlof Johansson continue; 67191b9a277SOlof Johansson 67291b9a277SOlof Johansson /* compress one level */ 67391b9a277SOlof Johansson 67406801916SStephen Hemminger node_set_parent(n, NULL); 675e0f7cb8cSJarek Poplawski tnode_free_safe(tn); 67619baf839SRobert Olsson return n; 67719baf839SRobert Olsson } 67880b71b80SJens Låås } 67919baf839SRobert Olsson return (struct node *) tn; 68019baf839SRobert Olsson } 68119baf839SRobert Olsson 6822f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn) 68319baf839SRobert Olsson { 68419baf839SRobert Olsson struct tnode *oldtnode = tn; 68519baf839SRobert Olsson int olen = tnode_child_length(tn); 68619baf839SRobert Olsson int i; 68719baf839SRobert Olsson 6880c7770c7SStephen Hemminger pr_debug("In inflate\n"); 68919baf839SRobert Olsson 69019baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 69119baf839SRobert Olsson 6922f80b3c8SRobert Olsson if (!tn) 6932f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 6942f36895aSRobert Olsson 6952f36895aSRobert Olsson /* 6962f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 6972f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 6982f36895aSRobert Olsson * fails. In case of failure we return the oldnode and inflate 6992f36895aSRobert Olsson * of tnode is ignored. 7002f36895aSRobert Olsson */ 7012f36895aSRobert Olsson 7022f36895aSRobert Olsson for (i = 0; i < olen; i++) { 703a07f5f50SStephen Hemminger struct tnode *inode; 7042f36895aSRobert Olsson 705a07f5f50SStephen Hemminger inode = (struct tnode *) tnode_get_child(oldtnode, i); 7062f36895aSRobert Olsson if (inode && 7072f36895aSRobert Olsson IS_TNODE(inode) && 7082f36895aSRobert Olsson inode->pos == oldtnode->pos + oldtnode->bits && 7092f36895aSRobert Olsson inode->bits > 1) { 7102f36895aSRobert Olsson struct tnode *left, *right; 711ab66b4a7SStephen Hemminger t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; 7122f36895aSRobert Olsson 7132f36895aSRobert Olsson left = tnode_new(inode->key&(~m), inode->pos + 1, 7142f36895aSRobert Olsson inode->bits - 1); 7152f80b3c8SRobert Olsson if (!left) 7162f80b3c8SRobert Olsson goto nomem; 7172f36895aSRobert Olsson 7182f36895aSRobert Olsson right = tnode_new(inode->key|m, inode->pos + 1, 7192f36895aSRobert Olsson inode->bits - 1); 7202f36895aSRobert Olsson 7212f36895aSRobert Olsson if (!right) { 7222f80b3c8SRobert Olsson tnode_free(left); 7232f80b3c8SRobert Olsson goto nomem; 7242f36895aSRobert Olsson } 7252f36895aSRobert Olsson 7262f36895aSRobert Olsson put_child(t, tn, 2*i, (struct node *) left); 7272f36895aSRobert Olsson put_child(t, tn, 2*i+1, (struct node *) right); 7282f36895aSRobert Olsson } 7292f36895aSRobert Olsson } 7302f36895aSRobert Olsson 73119baf839SRobert Olsson for (i = 0; i < olen; i++) { 732c95aaf9aSStephen Hemminger struct tnode *inode; 73319baf839SRobert Olsson struct node *node = tnode_get_child(oldtnode, i); 73491b9a277SOlof Johansson struct tnode *left, *right; 73591b9a277SOlof Johansson int size, j; 73619baf839SRobert Olsson 73719baf839SRobert Olsson /* An empty child */ 73819baf839SRobert Olsson if (node == NULL) 73919baf839SRobert Olsson continue; 74019baf839SRobert Olsson 74119baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 74219baf839SRobert Olsson 74319baf839SRobert Olsson if (IS_LEAF(node) || ((struct tnode *) node)->pos > 74419baf839SRobert Olsson tn->pos + tn->bits - 1) { 745a07f5f50SStephen Hemminger if (tkey_extract_bits(node->key, 746a07f5f50SStephen Hemminger oldtnode->pos + oldtnode->bits, 74719baf839SRobert Olsson 1) == 0) 74819baf839SRobert Olsson put_child(t, tn, 2*i, node); 74919baf839SRobert Olsson else 75019baf839SRobert Olsson put_child(t, tn, 2*i+1, node); 75119baf839SRobert Olsson continue; 75219baf839SRobert Olsson } 75319baf839SRobert Olsson 75419baf839SRobert Olsson /* An internal node with two children */ 75519baf839SRobert Olsson inode = (struct tnode *) node; 75619baf839SRobert Olsson 75719baf839SRobert Olsson if (inode->bits == 1) { 75819baf839SRobert Olsson put_child(t, tn, 2*i, inode->child[0]); 75919baf839SRobert Olsson put_child(t, tn, 2*i+1, inode->child[1]); 76019baf839SRobert Olsson 761e0f7cb8cSJarek Poplawski tnode_free_safe(inode); 76291b9a277SOlof Johansson continue; 76319baf839SRobert Olsson } 76419baf839SRobert Olsson 76519baf839SRobert Olsson /* An internal node with more than two children */ 76619baf839SRobert Olsson 76719baf839SRobert Olsson /* We will replace this node 'inode' with two new 76819baf839SRobert Olsson * ones, 'left' and 'right', each with half of the 76919baf839SRobert Olsson * original children. The two new nodes will have 77019baf839SRobert Olsson * a position one bit further down the key and this 77119baf839SRobert Olsson * means that the "significant" part of their keys 77219baf839SRobert Olsson * (see the discussion near the top of this file) 77319baf839SRobert Olsson * will differ by one bit, which will be "0" in 77419baf839SRobert Olsson * left's key and "1" in right's key. Since we are 77519baf839SRobert Olsson * moving the key position by one step, the bit that 77619baf839SRobert Olsson * we are moving away from - the bit at position 77719baf839SRobert Olsson * (inode->pos) - is the one that will differ between 77819baf839SRobert Olsson * left and right. So... we synthesize that bit in the 77919baf839SRobert Olsson * two new keys. 78019baf839SRobert Olsson * The mask 'm' below will be a single "one" bit at 78119baf839SRobert Olsson * the position (inode->pos) 78219baf839SRobert Olsson */ 78319baf839SRobert Olsson 78419baf839SRobert Olsson /* Use the old key, but set the new significant 78519baf839SRobert Olsson * bit to zero. 78619baf839SRobert Olsson */ 7872f36895aSRobert Olsson 7882f36895aSRobert Olsson left = (struct tnode *) tnode_get_child(tn, 2*i); 7892f36895aSRobert Olsson put_child(t, tn, 2*i, NULL); 79019baf839SRobert Olsson 79191b9a277SOlof Johansson BUG_ON(!left); 79219baf839SRobert Olsson 7932f36895aSRobert Olsson right = (struct tnode *) tnode_get_child(tn, 2*i+1); 7942f36895aSRobert Olsson put_child(t, tn, 2*i+1, NULL); 79519baf839SRobert Olsson 79691b9a277SOlof Johansson BUG_ON(!right); 79719baf839SRobert Olsson 79819baf839SRobert Olsson size = tnode_child_length(left); 79919baf839SRobert Olsson for (j = 0; j < size; j++) { 80019baf839SRobert Olsson put_child(t, left, j, inode->child[j]); 80119baf839SRobert Olsson put_child(t, right, j, inode->child[j + size]); 80219baf839SRobert Olsson } 80319baf839SRobert Olsson put_child(t, tn, 2*i, resize(t, left)); 80419baf839SRobert Olsson put_child(t, tn, 2*i+1, resize(t, right)); 80519baf839SRobert Olsson 806e0f7cb8cSJarek Poplawski tnode_free_safe(inode); 80719baf839SRobert Olsson } 808e0f7cb8cSJarek Poplawski tnode_free_safe(oldtnode); 80919baf839SRobert Olsson return tn; 8102f80b3c8SRobert Olsson nomem: 8112f80b3c8SRobert Olsson { 8122f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8132f80b3c8SRobert Olsson int j; 8142f80b3c8SRobert Olsson 8152f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8162f80b3c8SRobert Olsson if (tn->child[j]) 8172f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8182f80b3c8SRobert Olsson 8192f80b3c8SRobert Olsson tnode_free(tn); 8202f80b3c8SRobert Olsson 8212f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8222f80b3c8SRobert Olsson } 82319baf839SRobert Olsson } 82419baf839SRobert Olsson 8252f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn) 82619baf839SRobert Olsson { 82719baf839SRobert Olsson struct tnode *oldtnode = tn; 82819baf839SRobert Olsson struct node *left, *right; 82919baf839SRobert Olsson int i; 83019baf839SRobert Olsson int olen = tnode_child_length(tn); 83119baf839SRobert Olsson 8320c7770c7SStephen Hemminger pr_debug("In halve\n"); 83319baf839SRobert Olsson 83419baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 83519baf839SRobert Olsson 8362f80b3c8SRobert Olsson if (!tn) 8372f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8382f36895aSRobert Olsson 8392f36895aSRobert Olsson /* 8402f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 8412f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 8422f36895aSRobert Olsson * fails. In case of failure we return the oldnode and halve 8432f36895aSRobert Olsson * of tnode is ignored. 8442f36895aSRobert Olsson */ 8452f36895aSRobert Olsson 8462f36895aSRobert Olsson for (i = 0; i < olen; i += 2) { 8472f36895aSRobert Olsson left = tnode_get_child(oldtnode, i); 8482f36895aSRobert Olsson right = tnode_get_child(oldtnode, i+1); 8492f36895aSRobert Olsson 8502f36895aSRobert Olsson /* Two nonempty children */ 8512f36895aSRobert Olsson if (left && right) { 8522f80b3c8SRobert Olsson struct tnode *newn; 8532f36895aSRobert Olsson 8542f80b3c8SRobert Olsson newn = tnode_new(left->key, tn->pos + tn->bits, 1); 8552f80b3c8SRobert Olsson 8562f80b3c8SRobert Olsson if (!newn) 8572f80b3c8SRobert Olsson goto nomem; 8582f80b3c8SRobert Olsson 8592f80b3c8SRobert Olsson put_child(t, tn, i/2, (struct node *)newn); 8602f36895aSRobert Olsson } 8612f36895aSRobert Olsson 8622f36895aSRobert Olsson } 86319baf839SRobert Olsson 86419baf839SRobert Olsson for (i = 0; i < olen; i += 2) { 86591b9a277SOlof Johansson struct tnode *newBinNode; 86691b9a277SOlof Johansson 86719baf839SRobert Olsson left = tnode_get_child(oldtnode, i); 86819baf839SRobert Olsson right = tnode_get_child(oldtnode, i+1); 86919baf839SRobert Olsson 87019baf839SRobert Olsson /* At least one of the children is empty */ 87119baf839SRobert Olsson if (left == NULL) { 87219baf839SRobert Olsson if (right == NULL) /* Both are empty */ 87319baf839SRobert Olsson continue; 87419baf839SRobert Olsson put_child(t, tn, i/2, right); 87591b9a277SOlof Johansson continue; 87691b9a277SOlof Johansson } 87791b9a277SOlof Johansson 87891b9a277SOlof Johansson if (right == NULL) { 87919baf839SRobert Olsson put_child(t, tn, i/2, left); 88091b9a277SOlof Johansson continue; 88191b9a277SOlof Johansson } 88219baf839SRobert Olsson 88319baf839SRobert Olsson /* Two nonempty children */ 88491b9a277SOlof Johansson newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 8852f36895aSRobert Olsson put_child(t, tn, i/2, NULL); 88619baf839SRobert Olsson put_child(t, newBinNode, 0, left); 88719baf839SRobert Olsson put_child(t, newBinNode, 1, right); 88819baf839SRobert Olsson put_child(t, tn, i/2, resize(t, newBinNode)); 88919baf839SRobert Olsson } 890e0f7cb8cSJarek Poplawski tnode_free_safe(oldtnode); 89119baf839SRobert Olsson return tn; 8922f80b3c8SRobert Olsson nomem: 8932f80b3c8SRobert Olsson { 8942f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8952f80b3c8SRobert Olsson int j; 8962f80b3c8SRobert Olsson 8972f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8982f80b3c8SRobert Olsson if (tn->child[j]) 8992f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 9002f80b3c8SRobert Olsson 9012f80b3c8SRobert Olsson tnode_free(tn); 9022f80b3c8SRobert Olsson 9032f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 9042f80b3c8SRobert Olsson } 90519baf839SRobert Olsson } 90619baf839SRobert Olsson 907772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 9082373ce1cSRobert Olsson via get_fa_head and dump */ 9092373ce1cSRobert Olsson 910772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen) 91119baf839SRobert Olsson { 912772cb712SRobert Olsson struct hlist_head *head = &l->list; 91319baf839SRobert Olsson struct hlist_node *node; 91419baf839SRobert Olsson struct leaf_info *li; 91519baf839SRobert Olsson 9162373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, head, hlist) 91719baf839SRobert Olsson if (li->plen == plen) 91819baf839SRobert Olsson return li; 91991b9a277SOlof Johansson 92019baf839SRobert Olsson return NULL; 92119baf839SRobert Olsson } 92219baf839SRobert Olsson 92319baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen) 92419baf839SRobert Olsson { 925772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 92619baf839SRobert Olsson 92791b9a277SOlof Johansson if (!li) 92891b9a277SOlof Johansson return NULL; 92919baf839SRobert Olsson 93091b9a277SOlof Johansson return &li->falh; 93119baf839SRobert Olsson } 93219baf839SRobert Olsson 93319baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 93419baf839SRobert Olsson { 93519baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 93691b9a277SOlof Johansson struct hlist_node *node; 93719baf839SRobert Olsson 93891b9a277SOlof Johansson if (hlist_empty(head)) { 9392373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 94091b9a277SOlof Johansson } else { 94191b9a277SOlof Johansson hlist_for_each_entry(li, node, head, hlist) { 94219baf839SRobert Olsson if (new->plen > li->plen) 94319baf839SRobert Olsson break; 94419baf839SRobert Olsson 94519baf839SRobert Olsson last = li; 94619baf839SRobert Olsson } 94719baf839SRobert Olsson if (last) 9482373ce1cSRobert Olsson hlist_add_after_rcu(&last->hlist, &new->hlist); 94919baf839SRobert Olsson else 9502373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 95119baf839SRobert Olsson } 95219baf839SRobert Olsson } 95319baf839SRobert Olsson 9542373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 9552373ce1cSRobert Olsson 95619baf839SRobert Olsson static struct leaf * 95719baf839SRobert Olsson fib_find_node(struct trie *t, u32 key) 95819baf839SRobert Olsson { 95919baf839SRobert Olsson int pos; 96019baf839SRobert Olsson struct tnode *tn; 96119baf839SRobert Olsson struct node *n; 96219baf839SRobert Olsson 96319baf839SRobert Olsson pos = 0; 9642373ce1cSRobert Olsson n = rcu_dereference(t->trie); 96519baf839SRobert Olsson 96619baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 96719baf839SRobert Olsson tn = (struct tnode *) n; 96819baf839SRobert Olsson 96919baf839SRobert Olsson check_tnode(tn); 97019baf839SRobert Olsson 97119baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 97219baf839SRobert Olsson pos = tn->pos + tn->bits; 973a07f5f50SStephen Hemminger n = tnode_get_child_rcu(tn, 974a07f5f50SStephen Hemminger tkey_extract_bits(key, 975a07f5f50SStephen Hemminger tn->pos, 976a07f5f50SStephen Hemminger tn->bits)); 97791b9a277SOlof Johansson } else 97819baf839SRobert Olsson break; 97919baf839SRobert Olsson } 98019baf839SRobert Olsson /* Case we have found a leaf. Compare prefixes */ 98119baf839SRobert Olsson 98291b9a277SOlof Johansson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) 98391b9a277SOlof Johansson return (struct leaf *)n; 98491b9a277SOlof Johansson 98519baf839SRobert Olsson return NULL; 98619baf839SRobert Olsson } 98719baf839SRobert Olsson 9887b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn) 98919baf839SRobert Olsson { 99019baf839SRobert Olsson int wasfull; 9913ed18d76SRobert Olsson t_key cindex, key; 99206801916SStephen Hemminger struct tnode *tp; 99319baf839SRobert Olsson 9943ed18d76SRobert Olsson key = tn->key; 9953ed18d76SRobert Olsson 99606801916SStephen Hemminger while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 99719baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 99819baf839SRobert Olsson wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 99919baf839SRobert Olsson tn = (struct tnode *) resize(t, (struct tnode *)tn); 1000a07f5f50SStephen Hemminger 1001a07f5f50SStephen Hemminger tnode_put_child_reorg((struct tnode *)tp, cindex, 1002a07f5f50SStephen Hemminger (struct node *)tn, wasfull); 100319baf839SRobert Olsson 100406801916SStephen Hemminger tp = node_parent((struct node *) tn); 1005008440e3SJarek Poplawski if (!tp) 1006008440e3SJarek Poplawski rcu_assign_pointer(t->trie, (struct node *)tn); 1007008440e3SJarek Poplawski 1008e0f7cb8cSJarek Poplawski tnode_free_flush(); 100906801916SStephen Hemminger if (!tp) 101019baf839SRobert Olsson break; 101106801916SStephen Hemminger tn = tp; 101219baf839SRobert Olsson } 101306801916SStephen Hemminger 101419baf839SRobert Olsson /* Handle last (top) tnode */ 10157b85576dSJarek Poplawski if (IS_TNODE(tn)) 101619baf839SRobert Olsson tn = (struct tnode *)resize(t, (struct tnode *)tn); 101719baf839SRobert Olsson 10187b85576dSJarek Poplawski rcu_assign_pointer(t->trie, (struct node *)tn); 10197b85576dSJarek Poplawski tnode_free_flush(); 10207b85576dSJarek Poplawski 10217b85576dSJarek Poplawski return; 102219baf839SRobert Olsson } 102319baf839SRobert Olsson 10242373ce1cSRobert Olsson /* only used from updater-side */ 10252373ce1cSRobert Olsson 1026fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 102719baf839SRobert Olsson { 102819baf839SRobert Olsson int pos, newpos; 102919baf839SRobert Olsson struct tnode *tp = NULL, *tn = NULL; 103019baf839SRobert Olsson struct node *n; 103119baf839SRobert Olsson struct leaf *l; 103219baf839SRobert Olsson int missbit; 103319baf839SRobert Olsson struct list_head *fa_head = NULL; 103419baf839SRobert Olsson struct leaf_info *li; 103519baf839SRobert Olsson t_key cindex; 103619baf839SRobert Olsson 103719baf839SRobert Olsson pos = 0; 103819baf839SRobert Olsson n = t->trie; 103919baf839SRobert Olsson 104019baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 104119baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 104219baf839SRobert Olsson * and we should just put our new leaf in that. 104319baf839SRobert Olsson * If we point to a T_TNODE, check if it matches our key. Note that 104419baf839SRobert Olsson * a T_TNODE might be skipping any number of bits - its 'pos' need 104519baf839SRobert Olsson * not be the parent's 'pos'+'bits'! 104619baf839SRobert Olsson * 104719baf839SRobert Olsson * If it does match the current key, get pos/bits from it, extract 104819baf839SRobert Olsson * the index from our key, push the T_TNODE and walk the tree. 104919baf839SRobert Olsson * 105019baf839SRobert Olsson * If it doesn't, we have to replace it with a new T_TNODE. 105119baf839SRobert Olsson * 105219baf839SRobert Olsson * If we point to a T_LEAF, it might or might not have the same key 105319baf839SRobert Olsson * as we do. If it does, just change the value, update the T_LEAF's 105419baf839SRobert Olsson * value, and return it. 105519baf839SRobert Olsson * If it doesn't, we need to replace it with a T_TNODE. 105619baf839SRobert Olsson */ 105719baf839SRobert Olsson 105819baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 105919baf839SRobert Olsson tn = (struct tnode *) n; 106019baf839SRobert Olsson 106119baf839SRobert Olsson check_tnode(tn); 106219baf839SRobert Olsson 106319baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 106419baf839SRobert Olsson tp = tn; 106519baf839SRobert Olsson pos = tn->pos + tn->bits; 1066a07f5f50SStephen Hemminger n = tnode_get_child(tn, 1067a07f5f50SStephen Hemminger tkey_extract_bits(key, 1068a07f5f50SStephen Hemminger tn->pos, 1069a07f5f50SStephen Hemminger tn->bits)); 107019baf839SRobert Olsson 107106801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 107291b9a277SOlof Johansson } else 107319baf839SRobert Olsson break; 107419baf839SRobert Olsson } 107519baf839SRobert Olsson 107619baf839SRobert Olsson /* 107719baf839SRobert Olsson * n ----> NULL, LEAF or TNODE 107819baf839SRobert Olsson * 107919baf839SRobert Olsson * tp is n's (parent) ----> NULL or TNODE 108019baf839SRobert Olsson */ 108119baf839SRobert Olsson 108291b9a277SOlof Johansson BUG_ON(tp && IS_LEAF(tp)); 108319baf839SRobert Olsson 108419baf839SRobert Olsson /* Case 1: n is a leaf. Compare prefixes */ 108519baf839SRobert Olsson 108619baf839SRobert Olsson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1087c95aaf9aSStephen Hemminger l = (struct leaf *) n; 108819baf839SRobert Olsson li = leaf_info_new(plen); 108919baf839SRobert Olsson 1090fea86ad8SStephen Hemminger if (!li) 1091fea86ad8SStephen Hemminger return NULL; 109219baf839SRobert Olsson 109319baf839SRobert Olsson fa_head = &li->falh; 109419baf839SRobert Olsson insert_leaf_info(&l->list, li); 109519baf839SRobert Olsson goto done; 109619baf839SRobert Olsson } 109719baf839SRobert Olsson l = leaf_new(); 109819baf839SRobert Olsson 1099fea86ad8SStephen Hemminger if (!l) 1100fea86ad8SStephen Hemminger return NULL; 110119baf839SRobert Olsson 110219baf839SRobert Olsson l->key = key; 110319baf839SRobert Olsson li = leaf_info_new(plen); 110419baf839SRobert Olsson 1105f835e471SRobert Olsson if (!li) { 1106387a5487SStephen Hemminger free_leaf(l); 1107fea86ad8SStephen Hemminger return NULL; 1108f835e471SRobert Olsson } 110919baf839SRobert Olsson 111019baf839SRobert Olsson fa_head = &li->falh; 111119baf839SRobert Olsson insert_leaf_info(&l->list, li); 111219baf839SRobert Olsson 111319baf839SRobert Olsson if (t->trie && n == NULL) { 111491b9a277SOlof Johansson /* Case 2: n is NULL, and will just insert a new leaf */ 111519baf839SRobert Olsson 111606801916SStephen Hemminger node_set_parent((struct node *)l, tp); 111719baf839SRobert Olsson 111819baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 111919baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 112091b9a277SOlof Johansson } else { 112119baf839SRobert Olsson /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 112219baf839SRobert Olsson /* 112319baf839SRobert Olsson * Add a new tnode here 112419baf839SRobert Olsson * first tnode need some special handling 112519baf839SRobert Olsson */ 112619baf839SRobert Olsson 112719baf839SRobert Olsson if (tp) 112819baf839SRobert Olsson pos = tp->pos+tp->bits; 112919baf839SRobert Olsson else 113019baf839SRobert Olsson pos = 0; 113191b9a277SOlof Johansson 113219baf839SRobert Olsson if (n) { 113319baf839SRobert Olsson newpos = tkey_mismatch(key, pos, n->key); 113419baf839SRobert Olsson tn = tnode_new(n->key, newpos, 1); 113591b9a277SOlof Johansson } else { 113619baf839SRobert Olsson newpos = 0; 113719baf839SRobert Olsson tn = tnode_new(key, newpos, 1); /* First tnode */ 113819baf839SRobert Olsson } 1139f835e471SRobert Olsson 1140f835e471SRobert Olsson if (!tn) { 1141f835e471SRobert Olsson free_leaf_info(li); 1142387a5487SStephen Hemminger free_leaf(l); 1143fea86ad8SStephen Hemminger return NULL; 1144f835e471SRobert Olsson } 114519baf839SRobert Olsson 114606801916SStephen Hemminger node_set_parent((struct node *)tn, tp); 114719baf839SRobert Olsson 114819baf839SRobert Olsson missbit = tkey_extract_bits(key, newpos, 1); 114919baf839SRobert Olsson put_child(t, tn, missbit, (struct node *)l); 115019baf839SRobert Olsson put_child(t, tn, 1-missbit, n); 115119baf839SRobert Olsson 115219baf839SRobert Olsson if (tp) { 115319baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1154a07f5f50SStephen Hemminger put_child(t, (struct tnode *)tp, cindex, 1155a07f5f50SStephen Hemminger (struct node *)tn); 115691b9a277SOlof Johansson } else { 1157a07f5f50SStephen Hemminger rcu_assign_pointer(t->trie, (struct node *)tn); 115819baf839SRobert Olsson tp = tn; 115919baf839SRobert Olsson } 116019baf839SRobert Olsson } 116191b9a277SOlof Johansson 116291b9a277SOlof Johansson if (tp && tp->pos + tp->bits > 32) 1163a07f5f50SStephen Hemminger pr_warning("fib_trie" 1164a07f5f50SStephen Hemminger " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 116519baf839SRobert Olsson tp, tp->pos, tp->bits, key, plen); 116691b9a277SOlof Johansson 116719baf839SRobert Olsson /* Rebalance the trie */ 11682373ce1cSRobert Olsson 11697b85576dSJarek Poplawski trie_rebalance(t, tp); 1170f835e471SRobert Olsson done: 117119baf839SRobert Olsson return fa_head; 117219baf839SRobert Olsson } 117319baf839SRobert Olsson 1174d562f1f8SRobert Olsson /* 1175d562f1f8SRobert Olsson * Caller must hold RTNL. 1176d562f1f8SRobert Olsson */ 11774e902c57SThomas Graf static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 117819baf839SRobert Olsson { 117919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 118019baf839SRobert Olsson struct fib_alias *fa, *new_fa; 118119baf839SRobert Olsson struct list_head *fa_head = NULL; 118219baf839SRobert Olsson struct fib_info *fi; 11834e902c57SThomas Graf int plen = cfg->fc_dst_len; 11844e902c57SThomas Graf u8 tos = cfg->fc_tos; 118519baf839SRobert Olsson u32 key, mask; 118619baf839SRobert Olsson int err; 118719baf839SRobert Olsson struct leaf *l; 118819baf839SRobert Olsson 118919baf839SRobert Olsson if (plen > 32) 119019baf839SRobert Olsson return -EINVAL; 119119baf839SRobert Olsson 11924e902c57SThomas Graf key = ntohl(cfg->fc_dst); 119319baf839SRobert Olsson 11942dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 119519baf839SRobert Olsson 119619baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 119719baf839SRobert Olsson 119819baf839SRobert Olsson if (key & ~mask) 119919baf839SRobert Olsson return -EINVAL; 120019baf839SRobert Olsson 120119baf839SRobert Olsson key = key & mask; 120219baf839SRobert Olsson 12034e902c57SThomas Graf fi = fib_create_info(cfg); 12044e902c57SThomas Graf if (IS_ERR(fi)) { 12054e902c57SThomas Graf err = PTR_ERR(fi); 120619baf839SRobert Olsson goto err; 12074e902c57SThomas Graf } 120819baf839SRobert Olsson 120919baf839SRobert Olsson l = fib_find_node(t, key); 121019baf839SRobert Olsson fa = NULL; 121119baf839SRobert Olsson 121219baf839SRobert Olsson if (l) { 121319baf839SRobert Olsson fa_head = get_fa_head(l, plen); 121419baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 121519baf839SRobert Olsson } 121619baf839SRobert Olsson 121719baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 121819baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 121919baf839SRobert Olsson * exists or to the node before which we will insert new one. 122019baf839SRobert Olsson * 122119baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 122219baf839SRobert Olsson * insert to the head of f. 122319baf839SRobert Olsson * 122419baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 122519baf839SRobert Olsson * and we need to allocate a new one of those as well. 122619baf839SRobert Olsson */ 122719baf839SRobert Olsson 1228936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1229936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1230936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 123119baf839SRobert Olsson 123219baf839SRobert Olsson err = -EEXIST; 12334e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 123419baf839SRobert Olsson goto out; 123519baf839SRobert Olsson 1236936f6f8eSJulian Anastasov /* We have 2 goals: 1237936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1238936f6f8eSJulian Anastasov * duplicate routes 1239936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1240936f6f8eSJulian Anastasov */ 1241936f6f8eSJulian Anastasov fa_match = NULL; 1242936f6f8eSJulian Anastasov fa_first = fa; 1243936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1244936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 1245936f6f8eSJulian Anastasov if (fa->fa_tos != tos) 1246936f6f8eSJulian Anastasov break; 1247936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1248936f6f8eSJulian Anastasov break; 1249936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1250936f6f8eSJulian Anastasov fa->fa_scope == cfg->fc_scope && 1251936f6f8eSJulian Anastasov fa->fa_info == fi) { 1252936f6f8eSJulian Anastasov fa_match = fa; 1253936f6f8eSJulian Anastasov break; 1254936f6f8eSJulian Anastasov } 1255936f6f8eSJulian Anastasov } 1256936f6f8eSJulian Anastasov 12574e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 125819baf839SRobert Olsson struct fib_info *fi_drop; 125919baf839SRobert Olsson u8 state; 126019baf839SRobert Olsson 1261936f6f8eSJulian Anastasov fa = fa_first; 1262936f6f8eSJulian Anastasov if (fa_match) { 1263936f6f8eSJulian Anastasov if (fa == fa_match) 1264936f6f8eSJulian Anastasov err = 0; 12656725033fSJoonwoo Park goto out; 1266936f6f8eSJulian Anastasov } 12672373ce1cSRobert Olsson err = -ENOBUFS; 1268e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 12692373ce1cSRobert Olsson if (new_fa == NULL) 12702373ce1cSRobert Olsson goto out; 127119baf839SRobert Olsson 127219baf839SRobert Olsson fi_drop = fa->fa_info; 12732373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12742373ce1cSRobert Olsson new_fa->fa_info = fi; 12754e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12764e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 127719baf839SRobert Olsson state = fa->fa_state; 1278936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 127919baf839SRobert Olsson 12802373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12812373ce1cSRobert Olsson alias_free_mem_rcu(fa); 128219baf839SRobert Olsson 128319baf839SRobert Olsson fib_release_info(fi_drop); 128419baf839SRobert Olsson if (state & FA_S_ACCESSED) 128576e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 1286b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1287b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 128819baf839SRobert Olsson 128919baf839SRobert Olsson goto succeeded; 129019baf839SRobert Olsson } 129119baf839SRobert Olsson /* Error if we find a perfect match which 129219baf839SRobert Olsson * uses the same scope, type, and nexthop 129319baf839SRobert Olsson * information. 129419baf839SRobert Olsson */ 1295936f6f8eSJulian Anastasov if (fa_match) 129619baf839SRobert Olsson goto out; 1297a07f5f50SStephen Hemminger 12984e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1299936f6f8eSJulian Anastasov fa = fa_first; 130019baf839SRobert Olsson } 130119baf839SRobert Olsson err = -ENOENT; 13024e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 130319baf839SRobert Olsson goto out; 130419baf839SRobert Olsson 130519baf839SRobert Olsson err = -ENOBUFS; 1306e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 130719baf839SRobert Olsson if (new_fa == NULL) 130819baf839SRobert Olsson goto out; 130919baf839SRobert Olsson 131019baf839SRobert Olsson new_fa->fa_info = fi; 131119baf839SRobert Olsson new_fa->fa_tos = tos; 13124e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 13134e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 131419baf839SRobert Olsson new_fa->fa_state = 0; 131519baf839SRobert Olsson /* 131619baf839SRobert Olsson * Insert new entry to the list. 131719baf839SRobert Olsson */ 131819baf839SRobert Olsson 1319f835e471SRobert Olsson if (!fa_head) { 1320fea86ad8SStephen Hemminger fa_head = fib_insert_node(t, key, plen); 1321fea86ad8SStephen Hemminger if (unlikely(!fa_head)) { 1322fea86ad8SStephen Hemminger err = -ENOMEM; 1323f835e471SRobert Olsson goto out_free_new_fa; 1324f835e471SRobert Olsson } 1325fea86ad8SStephen Hemminger } 132619baf839SRobert Olsson 13272373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 13282373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 132919baf839SRobert Olsson 133076e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 13314e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1332b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 133319baf839SRobert Olsson succeeded: 133419baf839SRobert Olsson return 0; 1335f835e471SRobert Olsson 1336f835e471SRobert Olsson out_free_new_fa: 1337f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 133819baf839SRobert Olsson out: 133919baf839SRobert Olsson fib_release_info(fi); 134091b9a277SOlof Johansson err: 134119baf839SRobert Olsson return err; 134219baf839SRobert Olsson } 134319baf839SRobert Olsson 1344772cb712SRobert Olsson /* should be called with rcu_read_lock */ 1345a07f5f50SStephen Hemminger static int check_leaf(struct trie *t, struct leaf *l, 1346a07f5f50SStephen Hemminger t_key key, const struct flowi *flp, 134706c74270SPatrick McHardy struct fib_result *res) 134819baf839SRobert Olsson { 134919baf839SRobert Olsson struct leaf_info *li; 135019baf839SRobert Olsson struct hlist_head *hhead = &l->list; 135119baf839SRobert Olsson struct hlist_node *node; 135219baf839SRobert Olsson 13532373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1354a07f5f50SStephen Hemminger int err; 1355a07f5f50SStephen Hemminger int plen = li->plen; 1356a07f5f50SStephen Hemminger __be32 mask = inet_make_mask(plen); 1357a07f5f50SStephen Hemminger 1358888454c5SAl Viro if (l->key != (key & ntohl(mask))) 135919baf839SRobert Olsson continue; 136019baf839SRobert Olsson 1361e204a345SRami Rosen err = fib_semantic_match(&li->falh, flp, res, plen); 1362a07f5f50SStephen Hemminger 136319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1364a07f5f50SStephen Hemminger if (err <= 0) 136519baf839SRobert Olsson t->stats.semantic_match_passed++; 1366a07f5f50SStephen Hemminger else 136719baf839SRobert Olsson t->stats.semantic_match_miss++; 136819baf839SRobert Olsson #endif 1369a07f5f50SStephen Hemminger if (err <= 0) 13702e655571SBen Hutchings return err; 137119baf839SRobert Olsson } 137219baf839SRobert Olsson 13732e655571SBen Hutchings return 1; 1374a07f5f50SStephen Hemminger } 1375a07f5f50SStephen Hemminger 1376a07f5f50SStephen Hemminger static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, 1377a07f5f50SStephen Hemminger struct fib_result *res) 137819baf839SRobert Olsson { 137919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 13802e655571SBen Hutchings int ret; 138119baf839SRobert Olsson struct node *n; 138219baf839SRobert Olsson struct tnode *pn; 138319baf839SRobert Olsson int pos, bits; 138419baf839SRobert Olsson t_key key = ntohl(flp->fl4_dst); 138519baf839SRobert Olsson int chopped_off; 138619baf839SRobert Olsson t_key cindex = 0; 138719baf839SRobert Olsson int current_prefix_length = KEYLENGTH; 138891b9a277SOlof Johansson struct tnode *cn; 138991b9a277SOlof Johansson t_key node_prefix, key_prefix, pref_mismatch; 139091b9a277SOlof Johansson int mp; 139191b9a277SOlof Johansson 13922373ce1cSRobert Olsson rcu_read_lock(); 139319baf839SRobert Olsson 13942373ce1cSRobert Olsson n = rcu_dereference(t->trie); 139519baf839SRobert Olsson if (!n) 139619baf839SRobert Olsson goto failed; 139719baf839SRobert Olsson 139819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 139919baf839SRobert Olsson t->stats.gets++; 140019baf839SRobert Olsson #endif 140119baf839SRobert Olsson 140219baf839SRobert Olsson /* Just a leaf? */ 140319baf839SRobert Olsson if (IS_LEAF(n)) { 14042e655571SBen Hutchings ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1405a07f5f50SStephen Hemminger goto found; 140619baf839SRobert Olsson } 1407a07f5f50SStephen Hemminger 140819baf839SRobert Olsson pn = (struct tnode *) n; 140919baf839SRobert Olsson chopped_off = 0; 141019baf839SRobert Olsson 141119baf839SRobert Olsson while (pn) { 141219baf839SRobert Olsson pos = pn->pos; 141319baf839SRobert Olsson bits = pn->bits; 141419baf839SRobert Olsson 141519baf839SRobert Olsson if (!chopped_off) 1416ab66b4a7SStephen Hemminger cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1417ab66b4a7SStephen Hemminger pos, bits); 141819baf839SRobert Olsson 1419b902e573SJarek Poplawski n = tnode_get_child_rcu(pn, cindex); 142019baf839SRobert Olsson 142119baf839SRobert Olsson if (n == NULL) { 142219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 142319baf839SRobert Olsson t->stats.null_node_hit++; 142419baf839SRobert Olsson #endif 142519baf839SRobert Olsson goto backtrace; 142619baf839SRobert Olsson } 142719baf839SRobert Olsson 142891b9a277SOlof Johansson if (IS_LEAF(n)) { 14292e655571SBen Hutchings ret = check_leaf(t, (struct leaf *)n, key, flp, res); 14302e655571SBen Hutchings if (ret > 0) 143191b9a277SOlof Johansson goto backtrace; 1432a07f5f50SStephen Hemminger goto found; 143391b9a277SOlof Johansson } 143491b9a277SOlof Johansson 143591b9a277SOlof Johansson cn = (struct tnode *)n; 143619baf839SRobert Olsson 143719baf839SRobert Olsson /* 143819baf839SRobert Olsson * It's a tnode, and we can do some extra checks here if we 143919baf839SRobert Olsson * like, to avoid descending into a dead-end branch. 144019baf839SRobert Olsson * This tnode is in the parent's child array at index 144119baf839SRobert Olsson * key[p_pos..p_pos+p_bits] but potentially with some bits 144219baf839SRobert Olsson * chopped off, so in reality the index may be just a 144319baf839SRobert Olsson * subprefix, padded with zero at the end. 144419baf839SRobert Olsson * We can also take a look at any skipped bits in this 144519baf839SRobert Olsson * tnode - everything up to p_pos is supposed to be ok, 144619baf839SRobert Olsson * and the non-chopped bits of the index (se previous 144719baf839SRobert Olsson * paragraph) are also guaranteed ok, but the rest is 144819baf839SRobert Olsson * considered unknown. 144919baf839SRobert Olsson * 145019baf839SRobert Olsson * The skipped bits are key[pos+bits..cn->pos]. 145119baf839SRobert Olsson */ 145219baf839SRobert Olsson 145319baf839SRobert Olsson /* If current_prefix_length < pos+bits, we are already doing 145419baf839SRobert Olsson * actual prefix matching, which means everything from 145519baf839SRobert Olsson * pos+(bits-chopped_off) onward must be zero along some 145619baf839SRobert Olsson * branch of this subtree - otherwise there is *no* valid 145719baf839SRobert Olsson * prefix present. Here we can only check the skipped 145819baf839SRobert Olsson * bits. Remember, since we have already indexed into the 145919baf839SRobert Olsson * parent's child array, we know that the bits we chopped of 146019baf839SRobert Olsson * *are* zero. 146119baf839SRobert Olsson */ 146219baf839SRobert Olsson 1463a07f5f50SStephen Hemminger /* NOTA BENE: Checking only skipped bits 1464a07f5f50SStephen Hemminger for the new node here */ 146519baf839SRobert Olsson 146619baf839SRobert Olsson if (current_prefix_length < pos+bits) { 146719baf839SRobert Olsson if (tkey_extract_bits(cn->key, current_prefix_length, 1468a07f5f50SStephen Hemminger cn->pos - current_prefix_length) 1469a07f5f50SStephen Hemminger || !(cn->child[0])) 147019baf839SRobert Olsson goto backtrace; 147119baf839SRobert Olsson } 147219baf839SRobert Olsson 147319baf839SRobert Olsson /* 147419baf839SRobert Olsson * If chopped_off=0, the index is fully validated and we 147519baf839SRobert Olsson * only need to look at the skipped bits for this, the new, 147619baf839SRobert Olsson * tnode. What we actually want to do is to find out if 147719baf839SRobert Olsson * these skipped bits match our key perfectly, or if we will 147819baf839SRobert Olsson * have to count on finding a matching prefix further down, 147919baf839SRobert Olsson * because if we do, we would like to have some way of 148019baf839SRobert Olsson * verifying the existence of such a prefix at this point. 148119baf839SRobert Olsson */ 148219baf839SRobert Olsson 148319baf839SRobert Olsson /* The only thing we can do at this point is to verify that 148419baf839SRobert Olsson * any such matching prefix can indeed be a prefix to our 148519baf839SRobert Olsson * key, and if the bits in the node we are inspecting that 148619baf839SRobert Olsson * do not match our key are not ZERO, this cannot be true. 148719baf839SRobert Olsson * Thus, find out where there is a mismatch (before cn->pos) 148819baf839SRobert Olsson * and verify that all the mismatching bits are zero in the 148919baf839SRobert Olsson * new tnode's key. 149019baf839SRobert Olsson */ 149119baf839SRobert Olsson 1492a07f5f50SStephen Hemminger /* 1493a07f5f50SStephen Hemminger * Note: We aren't very concerned about the piece of 1494a07f5f50SStephen Hemminger * the key that precede pn->pos+pn->bits, since these 1495a07f5f50SStephen Hemminger * have already been checked. The bits after cn->pos 1496a07f5f50SStephen Hemminger * aren't checked since these are by definition 1497a07f5f50SStephen Hemminger * "unknown" at this point. Thus, what we want to see 1498a07f5f50SStephen Hemminger * is if we are about to enter the "prefix matching" 1499a07f5f50SStephen Hemminger * state, and in that case verify that the skipped 1500a07f5f50SStephen Hemminger * bits that will prevail throughout this subtree are 1501a07f5f50SStephen Hemminger * zero, as they have to be if we are to find a 1502a07f5f50SStephen Hemminger * matching prefix. 150319baf839SRobert Olsson */ 150419baf839SRobert Olsson 1505ab66b4a7SStephen Hemminger node_prefix = mask_pfx(cn->key, cn->pos); 1506ab66b4a7SStephen Hemminger key_prefix = mask_pfx(key, cn->pos); 150719baf839SRobert Olsson pref_mismatch = key_prefix^node_prefix; 150819baf839SRobert Olsson mp = 0; 150919baf839SRobert Olsson 1510a07f5f50SStephen Hemminger /* 1511a07f5f50SStephen Hemminger * In short: If skipped bits in this node do not match 1512a07f5f50SStephen Hemminger * the search key, enter the "prefix matching" 1513a07f5f50SStephen Hemminger * state.directly. 151419baf839SRobert Olsson */ 151519baf839SRobert Olsson if (pref_mismatch) { 151619baf839SRobert Olsson while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 151719baf839SRobert Olsson mp++; 151819baf839SRobert Olsson pref_mismatch = pref_mismatch << 1; 151919baf839SRobert Olsson } 152019baf839SRobert Olsson key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 152119baf839SRobert Olsson 152219baf839SRobert Olsson if (key_prefix != 0) 152319baf839SRobert Olsson goto backtrace; 152419baf839SRobert Olsson 152519baf839SRobert Olsson if (current_prefix_length >= cn->pos) 152619baf839SRobert Olsson current_prefix_length = mp; 152719baf839SRobert Olsson } 1528a07f5f50SStephen Hemminger 152919baf839SRobert Olsson pn = (struct tnode *)n; /* Descend */ 153019baf839SRobert Olsson chopped_off = 0; 153119baf839SRobert Olsson continue; 153291b9a277SOlof Johansson 153319baf839SRobert Olsson backtrace: 153419baf839SRobert Olsson chopped_off++; 153519baf839SRobert Olsson 153619baf839SRobert Olsson /* As zero don't change the child key (cindex) */ 1537a07f5f50SStephen Hemminger while ((chopped_off <= pn->bits) 1538a07f5f50SStephen Hemminger && !(cindex & (1<<(chopped_off-1)))) 153919baf839SRobert Olsson chopped_off++; 154019baf839SRobert Olsson 154119baf839SRobert Olsson /* Decrease current_... with bits chopped off */ 154219baf839SRobert Olsson if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1543a07f5f50SStephen Hemminger current_prefix_length = pn->pos + pn->bits 1544a07f5f50SStephen Hemminger - chopped_off; 154519baf839SRobert Olsson 154619baf839SRobert Olsson /* 154719baf839SRobert Olsson * Either we do the actual chop off according or if we have 154819baf839SRobert Olsson * chopped off all bits in this tnode walk up to our parent. 154919baf839SRobert Olsson */ 155019baf839SRobert Olsson 155191b9a277SOlof Johansson if (chopped_off <= pn->bits) { 155219baf839SRobert Olsson cindex &= ~(1 << (chopped_off-1)); 155391b9a277SOlof Johansson } else { 1554b902e573SJarek Poplawski struct tnode *parent = node_parent_rcu((struct node *) pn); 155506801916SStephen Hemminger if (!parent) 155619baf839SRobert Olsson goto failed; 155719baf839SRobert Olsson 155819baf839SRobert Olsson /* Get Child's index */ 155906801916SStephen Hemminger cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); 156006801916SStephen Hemminger pn = parent; 156119baf839SRobert Olsson chopped_off = 0; 156219baf839SRobert Olsson 156319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 156419baf839SRobert Olsson t->stats.backtrack++; 156519baf839SRobert Olsson #endif 156619baf839SRobert Olsson goto backtrace; 156719baf839SRobert Olsson } 156819baf839SRobert Olsson } 156919baf839SRobert Olsson failed: 157019baf839SRobert Olsson ret = 1; 157119baf839SRobert Olsson found: 15722373ce1cSRobert Olsson rcu_read_unlock(); 157319baf839SRobert Olsson return ret; 157419baf839SRobert Olsson } 157519baf839SRobert Olsson 157619baf839SRobert Olsson /* 15779195bef7SStephen Hemminger * Remove the leaf and return parent. 157819baf839SRobert Olsson */ 15799195bef7SStephen Hemminger static void trie_leaf_remove(struct trie *t, struct leaf *l) 15809195bef7SStephen Hemminger { 15819195bef7SStephen Hemminger struct tnode *tp = node_parent((struct node *) l); 15829195bef7SStephen Hemminger 15839195bef7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", l); 158419baf839SRobert Olsson 158519baf839SRobert Olsson if (tp) { 15869195bef7SStephen Hemminger t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); 158719baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, NULL); 15887b85576dSJarek Poplawski trie_rebalance(t, tp); 158991b9a277SOlof Johansson } else 15902373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 159119baf839SRobert Olsson 1592387a5487SStephen Hemminger free_leaf(l); 159319baf839SRobert Olsson } 159419baf839SRobert Olsson 1595d562f1f8SRobert Olsson /* 1596d562f1f8SRobert Olsson * Caller must hold RTNL. 1597d562f1f8SRobert Olsson */ 15984e902c57SThomas Graf static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 159919baf839SRobert Olsson { 160019baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 160119baf839SRobert Olsson u32 key, mask; 16024e902c57SThomas Graf int plen = cfg->fc_dst_len; 16034e902c57SThomas Graf u8 tos = cfg->fc_tos; 160419baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 160519baf839SRobert Olsson struct list_head *fa_head; 160619baf839SRobert Olsson struct leaf *l; 160791b9a277SOlof Johansson struct leaf_info *li; 160891b9a277SOlof Johansson 160919baf839SRobert Olsson if (plen > 32) 161019baf839SRobert Olsson return -EINVAL; 161119baf839SRobert Olsson 16124e902c57SThomas Graf key = ntohl(cfg->fc_dst); 161319baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 161419baf839SRobert Olsson 161519baf839SRobert Olsson if (key & ~mask) 161619baf839SRobert Olsson return -EINVAL; 161719baf839SRobert Olsson 161819baf839SRobert Olsson key = key & mask; 161919baf839SRobert Olsson l = fib_find_node(t, key); 162019baf839SRobert Olsson 162119baf839SRobert Olsson if (!l) 162219baf839SRobert Olsson return -ESRCH; 162319baf839SRobert Olsson 162419baf839SRobert Olsson fa_head = get_fa_head(l, plen); 162519baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 162619baf839SRobert Olsson 162719baf839SRobert Olsson if (!fa) 162819baf839SRobert Olsson return -ESRCH; 162919baf839SRobert Olsson 16300c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 163119baf839SRobert Olsson 163219baf839SRobert Olsson fa_to_delete = NULL; 1633936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1634936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 163519baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 163619baf839SRobert Olsson 163719baf839SRobert Olsson if (fa->fa_tos != tos) 163819baf839SRobert Olsson break; 163919baf839SRobert Olsson 16404e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 16414e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 16424e902c57SThomas Graf fa->fa_scope == cfg->fc_scope) && 16434e902c57SThomas Graf (!cfg->fc_protocol || 16444e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 16454e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 164619baf839SRobert Olsson fa_to_delete = fa; 164719baf839SRobert Olsson break; 164819baf839SRobert Olsson } 164919baf839SRobert Olsson } 165019baf839SRobert Olsson 165191b9a277SOlof Johansson if (!fa_to_delete) 165291b9a277SOlof Johansson return -ESRCH; 165319baf839SRobert Olsson 165419baf839SRobert Olsson fa = fa_to_delete; 16554e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1656b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 165719baf839SRobert Olsson 165819baf839SRobert Olsson l = fib_find_node(t, key); 1659772cb712SRobert Olsson li = find_leaf_info(l, plen); 166019baf839SRobert Olsson 16612373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 166219baf839SRobert Olsson 166319baf839SRobert Olsson if (list_empty(fa_head)) { 16642373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 166519baf839SRobert Olsson free_leaf_info(li); 16662373ce1cSRobert Olsson } 166719baf839SRobert Olsson 166819baf839SRobert Olsson if (hlist_empty(&l->list)) 16699195bef7SStephen Hemminger trie_leaf_remove(t, l); 167019baf839SRobert Olsson 167119baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 167276e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 167319baf839SRobert Olsson 16742373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16752373ce1cSRobert Olsson alias_free_mem_rcu(fa); 167619baf839SRobert Olsson return 0; 167719baf839SRobert Olsson } 167819baf839SRobert Olsson 1679ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head) 168019baf839SRobert Olsson { 168119baf839SRobert Olsson struct fib_alias *fa, *fa_node; 168219baf839SRobert Olsson int found = 0; 168319baf839SRobert Olsson 168419baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 168519baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 168619baf839SRobert Olsson 168719baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 16882373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 16892373ce1cSRobert Olsson fib_release_info(fa->fa_info); 16902373ce1cSRobert Olsson alias_free_mem_rcu(fa); 169119baf839SRobert Olsson found++; 169219baf839SRobert Olsson } 169319baf839SRobert Olsson } 169419baf839SRobert Olsson return found; 169519baf839SRobert Olsson } 169619baf839SRobert Olsson 1697ef3660ceSStephen Hemminger static int trie_flush_leaf(struct leaf *l) 169819baf839SRobert Olsson { 169919baf839SRobert Olsson int found = 0; 170019baf839SRobert Olsson struct hlist_head *lih = &l->list; 170119baf839SRobert Olsson struct hlist_node *node, *tmp; 170219baf839SRobert Olsson struct leaf_info *li = NULL; 170319baf839SRobert Olsson 170419baf839SRobert Olsson hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1705ef3660ceSStephen Hemminger found += trie_flush_list(&li->falh); 170619baf839SRobert Olsson 170719baf839SRobert Olsson if (list_empty(&li->falh)) { 17082373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 170919baf839SRobert Olsson free_leaf_info(li); 171019baf839SRobert Olsson } 171119baf839SRobert Olsson } 171219baf839SRobert Olsson return found; 171319baf839SRobert Olsson } 171419baf839SRobert Olsson 171582cfbb00SStephen Hemminger /* 171682cfbb00SStephen Hemminger * Scan for the next right leaf starting at node p->child[idx] 171782cfbb00SStephen Hemminger * Since we have back pointer, no recursion necessary. 171882cfbb00SStephen Hemminger */ 171982cfbb00SStephen Hemminger static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) 172019baf839SRobert Olsson { 172182cfbb00SStephen Hemminger do { 172282cfbb00SStephen Hemminger t_key idx; 172319baf839SRobert Olsson 172419baf839SRobert Olsson if (c) 172582cfbb00SStephen Hemminger idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1; 172619baf839SRobert Olsson else 172782cfbb00SStephen Hemminger idx = 0; 172819baf839SRobert Olsson 172982cfbb00SStephen Hemminger while (idx < 1u << p->bits) { 173082cfbb00SStephen Hemminger c = tnode_get_child_rcu(p, idx++); 17312373ce1cSRobert Olsson if (!c) 173291b9a277SOlof Johansson continue; 173319baf839SRobert Olsson 173482cfbb00SStephen Hemminger if (IS_LEAF(c)) { 173582cfbb00SStephen Hemminger prefetch(p->child[idx]); 17362373ce1cSRobert Olsson return (struct leaf *) c; 173719baf839SRobert Olsson } 173882cfbb00SStephen Hemminger 173982cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 174082cfbb00SStephen Hemminger p = (struct tnode *) c; 174182cfbb00SStephen Hemminger idx = 0; 174219baf839SRobert Olsson } 174382cfbb00SStephen Hemminger 174482cfbb00SStephen Hemminger /* Node empty, walk back up to parent */ 174582cfbb00SStephen Hemminger c = (struct node *) p; 174682cfbb00SStephen Hemminger } while ( (p = node_parent_rcu(c)) != NULL); 174782cfbb00SStephen Hemminger 174882cfbb00SStephen Hemminger return NULL; /* Root of trie */ 174982cfbb00SStephen Hemminger } 175082cfbb00SStephen Hemminger 175182cfbb00SStephen Hemminger static struct leaf *trie_firstleaf(struct trie *t) 175282cfbb00SStephen Hemminger { 175382cfbb00SStephen Hemminger struct tnode *n = (struct tnode *) rcu_dereference(t->trie); 175482cfbb00SStephen Hemminger 175582cfbb00SStephen Hemminger if (!n) 175682cfbb00SStephen Hemminger return NULL; 175782cfbb00SStephen Hemminger 175882cfbb00SStephen Hemminger if (IS_LEAF(n)) /* trie is just a leaf */ 175982cfbb00SStephen Hemminger return (struct leaf *) n; 176082cfbb00SStephen Hemminger 176182cfbb00SStephen Hemminger return leaf_walk_rcu(n, NULL); 176282cfbb00SStephen Hemminger } 176382cfbb00SStephen Hemminger 176482cfbb00SStephen Hemminger static struct leaf *trie_nextleaf(struct leaf *l) 176582cfbb00SStephen Hemminger { 176682cfbb00SStephen Hemminger struct node *c = (struct node *) l; 1767b902e573SJarek Poplawski struct tnode *p = node_parent_rcu(c); 176882cfbb00SStephen Hemminger 176982cfbb00SStephen Hemminger if (!p) 177082cfbb00SStephen Hemminger return NULL; /* trie with just one leaf */ 177182cfbb00SStephen Hemminger 177282cfbb00SStephen Hemminger return leaf_walk_rcu(p, c); 177319baf839SRobert Olsson } 177419baf839SRobert Olsson 177571d67e66SStephen Hemminger static struct leaf *trie_leafindex(struct trie *t, int index) 177671d67e66SStephen Hemminger { 177771d67e66SStephen Hemminger struct leaf *l = trie_firstleaf(t); 177871d67e66SStephen Hemminger 1779ec28cf73SStephen Hemminger while (l && index-- > 0) 178071d67e66SStephen Hemminger l = trie_nextleaf(l); 1781ec28cf73SStephen Hemminger 178271d67e66SStephen Hemminger return l; 178371d67e66SStephen Hemminger } 178471d67e66SStephen Hemminger 178571d67e66SStephen Hemminger 1786d562f1f8SRobert Olsson /* 1787d562f1f8SRobert Olsson * Caller must hold RTNL. 1788d562f1f8SRobert Olsson */ 178919baf839SRobert Olsson static int fn_trie_flush(struct fib_table *tb) 179019baf839SRobert Olsson { 179119baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 17929195bef7SStephen Hemminger struct leaf *l, *ll = NULL; 179382cfbb00SStephen Hemminger int found = 0; 179419baf839SRobert Olsson 179582cfbb00SStephen Hemminger for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { 1796ef3660ceSStephen Hemminger found += trie_flush_leaf(l); 179719baf839SRobert Olsson 179819baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 17999195bef7SStephen Hemminger trie_leaf_remove(t, ll); 180019baf839SRobert Olsson ll = l; 180119baf839SRobert Olsson } 180219baf839SRobert Olsson 180319baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 18049195bef7SStephen Hemminger trie_leaf_remove(t, ll); 180519baf839SRobert Olsson 18060c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 180719baf839SRobert Olsson return found; 180819baf839SRobert Olsson } 180919baf839SRobert Olsson 1810a07f5f50SStephen Hemminger static void fn_trie_select_default(struct fib_table *tb, 1811a07f5f50SStephen Hemminger const struct flowi *flp, 1812a07f5f50SStephen Hemminger struct fib_result *res) 181319baf839SRobert Olsson { 181419baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 181519baf839SRobert Olsson int order, last_idx; 181619baf839SRobert Olsson struct fib_info *fi = NULL; 181719baf839SRobert Olsson struct fib_info *last_resort; 181819baf839SRobert Olsson struct fib_alias *fa = NULL; 181919baf839SRobert Olsson struct list_head *fa_head; 182019baf839SRobert Olsson struct leaf *l; 182119baf839SRobert Olsson 182219baf839SRobert Olsson last_idx = -1; 182319baf839SRobert Olsson last_resort = NULL; 182419baf839SRobert Olsson order = -1; 182519baf839SRobert Olsson 18262373ce1cSRobert Olsson rcu_read_lock(); 182719baf839SRobert Olsson 182819baf839SRobert Olsson l = fib_find_node(t, 0); 182919baf839SRobert Olsson if (!l) 183019baf839SRobert Olsson goto out; 183119baf839SRobert Olsson 183219baf839SRobert Olsson fa_head = get_fa_head(l, 0); 183319baf839SRobert Olsson if (!fa_head) 183419baf839SRobert Olsson goto out; 183519baf839SRobert Olsson 183619baf839SRobert Olsson if (list_empty(fa_head)) 183719baf839SRobert Olsson goto out; 183819baf839SRobert Olsson 18392373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fa_head, fa_list) { 184019baf839SRobert Olsson struct fib_info *next_fi = fa->fa_info; 184119baf839SRobert Olsson 184219baf839SRobert Olsson if (fa->fa_scope != res->scope || 184319baf839SRobert Olsson fa->fa_type != RTN_UNICAST) 184419baf839SRobert Olsson continue; 184519baf839SRobert Olsson 184619baf839SRobert Olsson if (next_fi->fib_priority > res->fi->fib_priority) 184719baf839SRobert Olsson break; 184819baf839SRobert Olsson if (!next_fi->fib_nh[0].nh_gw || 184919baf839SRobert Olsson next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 185019baf839SRobert Olsson continue; 185119baf839SRobert Olsson fa->fa_state |= FA_S_ACCESSED; 185219baf839SRobert Olsson 185319baf839SRobert Olsson if (fi == NULL) { 185419baf839SRobert Olsson if (next_fi != res->fi) 185519baf839SRobert Olsson break; 185619baf839SRobert Olsson } else if (!fib_detect_death(fi, order, &last_resort, 1857971b893eSDenis V. Lunev &last_idx, tb->tb_default)) { 1858a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1859971b893eSDenis V. Lunev tb->tb_default = order; 186019baf839SRobert Olsson goto out; 186119baf839SRobert Olsson } 186219baf839SRobert Olsson fi = next_fi; 186319baf839SRobert Olsson order++; 186419baf839SRobert Olsson } 186519baf839SRobert Olsson if (order <= 0 || fi == NULL) { 1866971b893eSDenis V. Lunev tb->tb_default = -1; 186719baf839SRobert Olsson goto out; 186819baf839SRobert Olsson } 186919baf839SRobert Olsson 1870971b893eSDenis V. Lunev if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1871971b893eSDenis V. Lunev tb->tb_default)) { 1872a2bbe682SDenis V. Lunev fib_result_assign(res, fi); 1873971b893eSDenis V. Lunev tb->tb_default = order; 187419baf839SRobert Olsson goto out; 187519baf839SRobert Olsson } 1876a2bbe682SDenis V. Lunev if (last_idx >= 0) 1877a2bbe682SDenis V. Lunev fib_result_assign(res, last_resort); 1878971b893eSDenis V. Lunev tb->tb_default = last_idx; 1879971b893eSDenis V. Lunev out: 18802373ce1cSRobert Olsson rcu_read_unlock(); 188119baf839SRobert Olsson } 188219baf839SRobert Olsson 1883a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1884a07f5f50SStephen Hemminger struct fib_table *tb, 188519baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 188619baf839SRobert Olsson { 188719baf839SRobert Olsson int i, s_i; 188819baf839SRobert Olsson struct fib_alias *fa; 188932ab5f80SAl Viro __be32 xkey = htonl(key); 189019baf839SRobert Olsson 189171d67e66SStephen Hemminger s_i = cb->args[5]; 189219baf839SRobert Olsson i = 0; 189319baf839SRobert Olsson 18942373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 18952373ce1cSRobert Olsson 18962373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 189719baf839SRobert Olsson if (i < s_i) { 189819baf839SRobert Olsson i++; 189919baf839SRobert Olsson continue; 190019baf839SRobert Olsson } 190119baf839SRobert Olsson 190219baf839SRobert Olsson if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 190319baf839SRobert Olsson cb->nlh->nlmsg_seq, 190419baf839SRobert Olsson RTM_NEWROUTE, 190519baf839SRobert Olsson tb->tb_id, 190619baf839SRobert Olsson fa->fa_type, 190719baf839SRobert Olsson fa->fa_scope, 1908be403ea1SThomas Graf xkey, 190919baf839SRobert Olsson plen, 191019baf839SRobert Olsson fa->fa_tos, 191164347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 191271d67e66SStephen Hemminger cb->args[5] = i; 191319baf839SRobert Olsson return -1; 191419baf839SRobert Olsson } 191519baf839SRobert Olsson i++; 191619baf839SRobert Olsson } 191771d67e66SStephen Hemminger cb->args[5] = i; 191819baf839SRobert Olsson return skb->len; 191919baf839SRobert Olsson } 192019baf839SRobert Olsson 1921a88ee229SStephen Hemminger static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, 1922a07f5f50SStephen Hemminger struct sk_buff *skb, struct netlink_callback *cb) 192319baf839SRobert Olsson { 1924a88ee229SStephen Hemminger struct leaf_info *li; 1925a88ee229SStephen Hemminger struct hlist_node *node; 1926a88ee229SStephen Hemminger int i, s_i; 192791b9a277SOlof Johansson 192871d67e66SStephen Hemminger s_i = cb->args[4]; 1929a88ee229SStephen Hemminger i = 0; 193019baf839SRobert Olsson 1931a88ee229SStephen Hemminger /* rcu_read_lock is hold by caller */ 1932a88ee229SStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 1933a88ee229SStephen Hemminger if (i < s_i) { 1934a88ee229SStephen Hemminger i++; 193519baf839SRobert Olsson continue; 1936a88ee229SStephen Hemminger } 193719baf839SRobert Olsson 1938a88ee229SStephen Hemminger if (i > s_i) 193971d67e66SStephen Hemminger cb->args[5] = 0; 194019baf839SRobert Olsson 1941a88ee229SStephen Hemminger if (list_empty(&li->falh)) 194219baf839SRobert Olsson continue; 194319baf839SRobert Olsson 1944a88ee229SStephen Hemminger if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { 194571d67e66SStephen Hemminger cb->args[4] = i; 194619baf839SRobert Olsson return -1; 194719baf839SRobert Olsson } 1948a88ee229SStephen Hemminger i++; 194919baf839SRobert Olsson } 1950a88ee229SStephen Hemminger 195171d67e66SStephen Hemminger cb->args[4] = i; 195219baf839SRobert Olsson return skb->len; 195319baf839SRobert Olsson } 195419baf839SRobert Olsson 1955a07f5f50SStephen Hemminger static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, 1956a07f5f50SStephen Hemminger struct netlink_callback *cb) 195719baf839SRobert Olsson { 1958a88ee229SStephen Hemminger struct leaf *l; 195919baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 1960d5ce8a0eSStephen Hemminger t_key key = cb->args[2]; 196171d67e66SStephen Hemminger int count = cb->args[3]; 196219baf839SRobert Olsson 19632373ce1cSRobert Olsson rcu_read_lock(); 1964d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1965d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1966d5ce8a0eSStephen Hemminger */ 196771d67e66SStephen Hemminger if (count == 0) 1968d5ce8a0eSStephen Hemminger l = trie_firstleaf(t); 1969d5ce8a0eSStephen Hemminger else { 197071d67e66SStephen Hemminger /* Normally, continue from last key, but if that is missing 197171d67e66SStephen Hemminger * fallback to using slow rescan 1972d5ce8a0eSStephen Hemminger */ 197371d67e66SStephen Hemminger l = fib_find_node(t, key); 197471d67e66SStephen Hemminger if (!l) 197571d67e66SStephen Hemminger l = trie_leafindex(t, count); 197619baf839SRobert Olsson } 1977a88ee229SStephen Hemminger 1978d5ce8a0eSStephen Hemminger while (l) { 1979d5ce8a0eSStephen Hemminger cb->args[2] = l->key; 1980a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 198171d67e66SStephen Hemminger cb->args[3] = count; 19822373ce1cSRobert Olsson rcu_read_unlock(); 198319baf839SRobert Olsson return -1; 198419baf839SRobert Olsson } 1985d5ce8a0eSStephen Hemminger 198671d67e66SStephen Hemminger ++count; 1987d5ce8a0eSStephen Hemminger l = trie_nextleaf(l); 198871d67e66SStephen Hemminger memset(&cb->args[4], 0, 198971d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 1990a88ee229SStephen Hemminger } 199171d67e66SStephen Hemminger cb->args[3] = count; 1992a88ee229SStephen Hemminger rcu_read_unlock(); 1993a88ee229SStephen Hemminger 1994a88ee229SStephen Hemminger return skb->len; 1995a88ee229SStephen Hemminger } 199619baf839SRobert Olsson 19977f9b8052SStephen Hemminger void __init fib_hash_init(void) 19987f9b8052SStephen Hemminger { 1999a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 2000a07f5f50SStephen Hemminger sizeof(struct fib_alias), 2001bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 2002bc3c8c1eSStephen Hemminger 2003bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 2004bc3c8c1eSStephen Hemminger max(sizeof(struct leaf), 2005bc3c8c1eSStephen Hemminger sizeof(struct leaf_info)), 2006bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 20077f9b8052SStephen Hemminger } 200819baf839SRobert Olsson 20097f9b8052SStephen Hemminger 20107f9b8052SStephen Hemminger /* Fix more generic FIB names for init later */ 20117f9b8052SStephen Hemminger struct fib_table *fib_hash_table(u32 id) 201219baf839SRobert Olsson { 201319baf839SRobert Olsson struct fib_table *tb; 201419baf839SRobert Olsson struct trie *t; 201519baf839SRobert Olsson 201619baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 201719baf839SRobert Olsson GFP_KERNEL); 201819baf839SRobert Olsson if (tb == NULL) 201919baf839SRobert Olsson return NULL; 202019baf839SRobert Olsson 202119baf839SRobert Olsson tb->tb_id = id; 2022971b893eSDenis V. Lunev tb->tb_default = -1; 202319baf839SRobert Olsson tb->tb_lookup = fn_trie_lookup; 202419baf839SRobert Olsson tb->tb_insert = fn_trie_insert; 202519baf839SRobert Olsson tb->tb_delete = fn_trie_delete; 202619baf839SRobert Olsson tb->tb_flush = fn_trie_flush; 202719baf839SRobert Olsson tb->tb_select_default = fn_trie_select_default; 202819baf839SRobert Olsson tb->tb_dump = fn_trie_dump; 202919baf839SRobert Olsson 203019baf839SRobert Olsson t = (struct trie *) tb->tb_data; 2031c28a1cf4SStephen Hemminger memset(t, 0, sizeof(*t)); 203219baf839SRobert Olsson 203319baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 2034a07f5f50SStephen Hemminger pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION); 203519baf839SRobert Olsson 203619baf839SRobert Olsson return tb; 203719baf839SRobert Olsson } 203819baf839SRobert Olsson 2039cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2040cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2041cb7b593cSStephen Hemminger struct fib_trie_iter { 20421c340b2fSDenis V. Lunev struct seq_net_private p; 20433d3b2d25SStephen Hemminger struct fib_table *tb; 2044cb7b593cSStephen Hemminger struct tnode *tnode; 2045cb7b593cSStephen Hemminger unsigned index; 2046cb7b593cSStephen Hemminger unsigned depth; 2047cb7b593cSStephen Hemminger }; 204819baf839SRobert Olsson 2049cb7b593cSStephen Hemminger static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 205019baf839SRobert Olsson { 2051cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 2052cb7b593cSStephen Hemminger unsigned cindex = iter->index; 2053cb7b593cSStephen Hemminger struct tnode *p; 205419baf839SRobert Olsson 20556640e697SEric W. Biederman /* A single entry routing table */ 20566640e697SEric W. Biederman if (!tn) 20576640e697SEric W. Biederman return NULL; 20586640e697SEric W. Biederman 2059cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2060cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 2061cb7b593cSStephen Hemminger rescan: 2062cb7b593cSStephen Hemminger while (cindex < (1<<tn->bits)) { 2063b59cfbf7SEric Dumazet struct node *n = tnode_get_child_rcu(tn, cindex); 206419baf839SRobert Olsson 2065cb7b593cSStephen Hemminger if (n) { 206619baf839SRobert Olsson if (IS_LEAF(n)) { 2067cb7b593cSStephen Hemminger iter->tnode = tn; 2068cb7b593cSStephen Hemminger iter->index = cindex + 1; 206991b9a277SOlof Johansson } else { 2070cb7b593cSStephen Hemminger /* push down one level */ 2071cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2072cb7b593cSStephen Hemminger iter->index = 0; 2073cb7b593cSStephen Hemminger ++iter->depth; 207419baf839SRobert Olsson } 2075cb7b593cSStephen Hemminger return n; 207619baf839SRobert Olsson } 207719baf839SRobert Olsson 2078cb7b593cSStephen Hemminger ++cindex; 2079cb7b593cSStephen Hemminger } 2080cb7b593cSStephen Hemminger 2081cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 2082b59cfbf7SEric Dumazet p = node_parent_rcu((struct node *)tn); 2083cb7b593cSStephen Hemminger if (p) { 2084cb7b593cSStephen Hemminger cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2085cb7b593cSStephen Hemminger tn = p; 2086cb7b593cSStephen Hemminger --iter->depth; 2087cb7b593cSStephen Hemminger goto rescan; 2088cb7b593cSStephen Hemminger } 2089cb7b593cSStephen Hemminger 2090cb7b593cSStephen Hemminger /* got root? */ 2091cb7b593cSStephen Hemminger return NULL; 2092cb7b593cSStephen Hemminger } 2093cb7b593cSStephen Hemminger 2094cb7b593cSStephen Hemminger static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2095cb7b593cSStephen Hemminger struct trie *t) 2096cb7b593cSStephen Hemminger { 20975ddf0eb2SRobert Olsson struct node *n; 20985ddf0eb2SRobert Olsson 20995ddf0eb2SRobert Olsson if (!t) 21005ddf0eb2SRobert Olsson return NULL; 21015ddf0eb2SRobert Olsson 21025ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 21033d3b2d25SStephen Hemminger if (!n) 21045ddf0eb2SRobert Olsson return NULL; 2105cb7b593cSStephen Hemminger 21066640e697SEric W. Biederman if (IS_TNODE(n)) { 2107cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2108cb7b593cSStephen Hemminger iter->index = 0; 21091d25cd6cSRobert Olsson iter->depth = 1; 21106640e697SEric W. Biederman } else { 21116640e697SEric W. Biederman iter->tnode = NULL; 21126640e697SEric W. Biederman iter->index = 0; 21136640e697SEric W. Biederman iter->depth = 0; 21146640e697SEric W. Biederman } 21153d3b2d25SStephen Hemminger 2116cb7b593cSStephen Hemminger return n; 2117cb7b593cSStephen Hemminger } 2118cb7b593cSStephen Hemminger 2119cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 212019baf839SRobert Olsson { 21212373ce1cSRobert Olsson struct node *n; 2122cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2123cb7b593cSStephen Hemminger 2124cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 212519baf839SRobert Olsson 21262373ce1cSRobert Olsson rcu_read_lock(); 21273d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2128cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 212993672292SStephen Hemminger struct leaf *l = (struct leaf *)n; 213093672292SStephen Hemminger struct leaf_info *li; 213193672292SStephen Hemminger struct hlist_node *tmp; 213293672292SStephen Hemminger 213319baf839SRobert Olsson s->leaves++; 2134cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2135cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2136cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 213793672292SStephen Hemminger 213893672292SStephen Hemminger hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) 213993672292SStephen Hemminger ++s->prefixes; 214091b9a277SOlof Johansson } else { 2141cb7b593cSStephen Hemminger const struct tnode *tn = (const struct tnode *) n; 2142cb7b593cSStephen Hemminger int i; 214319baf839SRobert Olsson 214419baf839SRobert Olsson s->tnodes++; 214506ef921dSRobert Olsson if (tn->bits < MAX_STAT_DEPTH) 214619baf839SRobert Olsson s->nodesizes[tn->bits]++; 214706ef921dSRobert Olsson 2148cb7b593cSStephen Hemminger for (i = 0; i < (1<<tn->bits); i++) 2149cb7b593cSStephen Hemminger if (!tn->child[i]) 215019baf839SRobert Olsson s->nullpointers++; 215119baf839SRobert Olsson } 215219baf839SRobert Olsson } 21532373ce1cSRobert Olsson rcu_read_unlock(); 215419baf839SRobert Olsson } 215519baf839SRobert Olsson 215619baf839SRobert Olsson /* 215719baf839SRobert Olsson * This outputs /proc/net/fib_triestats 215819baf839SRobert Olsson */ 2159cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 216019baf839SRobert Olsson { 2161cb7b593cSStephen Hemminger unsigned i, max, pointers, bytes, avdepth; 216219baf839SRobert Olsson 216319baf839SRobert Olsson if (stat->leaves) 216419baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 216519baf839SRobert Olsson else 216619baf839SRobert Olsson avdepth = 0; 216719baf839SRobert Olsson 2168a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2169a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2170cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2171cb7b593cSStephen Hemminger 2172cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2173cb7b593cSStephen Hemminger bytes = sizeof(struct leaf) * stat->leaves; 217493672292SStephen Hemminger 217593672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 217693672292SStephen Hemminger bytes += sizeof(struct leaf_info) * stat->prefixes; 217793672292SStephen Hemminger 2178187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 217919baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 218019baf839SRobert Olsson 218106ef921dSRobert Olsson max = MAX_STAT_DEPTH; 218206ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 218319baf839SRobert Olsson max--; 218419baf839SRobert Olsson 2185cb7b593cSStephen Hemminger pointers = 0; 218619baf839SRobert Olsson for (i = 1; i <= max; i++) 218719baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2188187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 218919baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 219019baf839SRobert Olsson } 2191cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2192187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2193cb7b593cSStephen Hemminger 219419baf839SRobert Olsson bytes += sizeof(struct node *) * pointers; 2195187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2196187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 219766a2f7fdSStephen Hemminger } 219819baf839SRobert Olsson 219919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 220066a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 220166a2f7fdSStephen Hemminger const struct trie_use_stats *stats) 220266a2f7fdSStephen Hemminger { 220366a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 220466a2f7fdSStephen Hemminger seq_printf(seq, "gets = %u\n", stats->gets); 220566a2f7fdSStephen Hemminger seq_printf(seq, "backtracks = %u\n", stats->backtrack); 2206a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 2207a07f5f50SStephen Hemminger stats->semantic_match_passed); 2208a07f5f50SStephen Hemminger seq_printf(seq, "semantic match miss = %u\n", 2209a07f5f50SStephen Hemminger stats->semantic_match_miss); 221066a2f7fdSStephen Hemminger seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); 2211a07f5f50SStephen Hemminger seq_printf(seq, "skipped node resize = %u\n\n", 2212a07f5f50SStephen Hemminger stats->resize_node_skipped); 221319baf839SRobert Olsson } 221466a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 221566a2f7fdSStephen Hemminger 22163d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2217d717a9a6SStephen Hemminger { 22183d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 22193d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 22203d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 22213d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 22223d3b2d25SStephen Hemminger else 22233d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2224d717a9a6SStephen Hemminger } 222519baf839SRobert Olsson 22263d3b2d25SStephen Hemminger 222719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 222819baf839SRobert Olsson { 22291c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 22303d3b2d25SStephen Hemminger unsigned int h; 2231877a9bffSEric W. Biederman 2232d717a9a6SStephen Hemminger seq_printf(seq, 2233a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2234a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 223519baf839SRobert Olsson sizeof(struct leaf), sizeof(struct tnode)); 223619baf839SRobert Olsson 22373d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22383d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22393d3b2d25SStephen Hemminger struct hlist_node *node; 22403d3b2d25SStephen Hemminger struct fib_table *tb; 2241cb7b593cSStephen Hemminger 22423d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 22433d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 22443d3b2d25SStephen Hemminger struct trie_stat stat; 22453d3b2d25SStephen Hemminger 22463d3b2d25SStephen Hemminger if (!t) 22473d3b2d25SStephen Hemminger continue; 22483d3b2d25SStephen Hemminger 22493d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22503d3b2d25SStephen Hemminger 22513d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22523d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22533d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22543d3b2d25SStephen Hemminger trie_show_usage(seq, &t->stats); 22553d3b2d25SStephen Hemminger #endif 22563d3b2d25SStephen Hemminger } 22573d3b2d25SStephen Hemminger } 2258cb7b593cSStephen Hemminger 225919baf839SRobert Olsson return 0; 226019baf839SRobert Olsson } 226119baf839SRobert Olsson 226219baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 226319baf839SRobert Olsson { 2264de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 22651c340b2fSDenis V. Lunev } 22661c340b2fSDenis V. Lunev 22679a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 226819baf839SRobert Olsson .owner = THIS_MODULE, 226919baf839SRobert Olsson .open = fib_triestat_seq_open, 227019baf839SRobert Olsson .read = seq_read, 227119baf839SRobert Olsson .llseek = seq_lseek, 2272b6fcbdb4SPavel Emelyanov .release = single_release_net, 227319baf839SRobert Olsson }; 227419baf839SRobert Olsson 22751218854aSYOSHIFUJI Hideaki static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 227619baf839SRobert Olsson { 22771218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 22781218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2279cb7b593cSStephen Hemminger loff_t idx = 0; 22803d3b2d25SStephen Hemminger unsigned int h; 22813d3b2d25SStephen Hemminger 22823d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22833d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22843d3b2d25SStephen Hemminger struct hlist_node *node; 22853d3b2d25SStephen Hemminger struct fib_table *tb; 22863d3b2d25SStephen Hemminger 22873d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 2288cb7b593cSStephen Hemminger struct node *n; 2289cb7b593cSStephen Hemminger 22903d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 22913d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 22923d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 22933d3b2d25SStephen Hemminger if (pos == idx++) { 22943d3b2d25SStephen Hemminger iter->tb = tb; 2295cb7b593cSStephen Hemminger return n; 229619baf839SRobert Olsson } 22973d3b2d25SStephen Hemminger } 22983d3b2d25SStephen Hemminger } 229919baf839SRobert Olsson 230019baf839SRobert Olsson return NULL; 230119baf839SRobert Olsson } 230219baf839SRobert Olsson 230319baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2304c95aaf9aSStephen Hemminger __acquires(RCU) 230519baf839SRobert Olsson { 2306cb7b593cSStephen Hemminger rcu_read_lock(); 23071218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 230819baf839SRobert Olsson } 230919baf839SRobert Olsson 231019baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 231119baf839SRobert Olsson { 2312cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 23131218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 23143d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 23153d3b2d25SStephen Hemminger struct hlist_node *tb_node; 23163d3b2d25SStephen Hemminger unsigned int h; 23173d3b2d25SStephen Hemminger struct node *n; 2318cb7b593cSStephen Hemminger 231919baf839SRobert Olsson ++*pos; 23203d3b2d25SStephen Hemminger /* next node in same table */ 23213d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 23223d3b2d25SStephen Hemminger if (n) 23233d3b2d25SStephen Hemminger return n; 232491b9a277SOlof Johansson 23253d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 23263d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 23273d3b2d25SStephen Hemminger while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 23283d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 23293d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23303d3b2d25SStephen Hemminger if (n) 23313d3b2d25SStephen Hemminger goto found; 23323d3b2d25SStephen Hemminger } 2333cb7b593cSStephen Hemminger 23343d3b2d25SStephen Hemminger /* new hash chain */ 23353d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 23363d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 23373d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { 23383d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23393d3b2d25SStephen Hemminger if (n) 23403d3b2d25SStephen Hemminger goto found; 23413d3b2d25SStephen Hemminger } 23423d3b2d25SStephen Hemminger } 2343cb7b593cSStephen Hemminger return NULL; 23443d3b2d25SStephen Hemminger 23453d3b2d25SStephen Hemminger found: 23463d3b2d25SStephen Hemminger iter->tb = tb; 23473d3b2d25SStephen Hemminger return n; 234819baf839SRobert Olsson } 234919baf839SRobert Olsson 235019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2351c95aaf9aSStephen Hemminger __releases(RCU) 235219baf839SRobert Olsson { 2353cb7b593cSStephen Hemminger rcu_read_unlock(); 235419baf839SRobert Olsson } 235519baf839SRobert Olsson 2356cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2357cb7b593cSStephen Hemminger { 2358cb7b593cSStephen Hemminger while (n-- > 0) seq_puts(seq, " "); 2359cb7b593cSStephen Hemminger } 236019baf839SRobert Olsson 236128d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2362cb7b593cSStephen Hemminger { 2363cb7b593cSStephen Hemminger switch (s) { 2364cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2365cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2366cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2367cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2368cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2369cb7b593cSStephen Hemminger default: 237028d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2371cb7b593cSStephen Hemminger return buf; 2372cb7b593cSStephen Hemminger } 2373cb7b593cSStephen Hemminger } 2374cb7b593cSStephen Hemminger 237536cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2376cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2377cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2378cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2379cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2380cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2381cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2382cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2383cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2384cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2385cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2386cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2387cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2388cb7b593cSStephen Hemminger }; 2389cb7b593cSStephen Hemminger 239028d36e37SEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2391cb7b593cSStephen Hemminger { 2392cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2393cb7b593cSStephen Hemminger return rtn_type_names[t]; 239428d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2395cb7b593cSStephen Hemminger return buf; 2396cb7b593cSStephen Hemminger } 2397cb7b593cSStephen Hemminger 2398cb7b593cSStephen Hemminger /* Pretty print the trie */ 239919baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 240019baf839SRobert Olsson { 2401cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2402cb7b593cSStephen Hemminger struct node *n = v; 240319baf839SRobert Olsson 24043d3b2d25SStephen Hemminger if (!node_parent_rcu(n)) 24053d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2406095b8501SRobert Olsson 2407095b8501SRobert Olsson if (IS_TNODE(n)) { 2408095b8501SRobert Olsson struct tnode *tn = (struct tnode *) n; 2409ab66b4a7SStephen Hemminger __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2410095b8501SRobert Olsson 24111d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2412673d57e7SHarvey Harrison seq_printf(seq, " +-- %pI4/%d %d %d %d\n", 2413673d57e7SHarvey Harrison &prf, tn->pos, tn->bits, tn->full_children, 24141d25cd6cSRobert Olsson tn->empty_children); 24151d25cd6cSRobert Olsson 2416cb7b593cSStephen Hemminger } else { 2417cb7b593cSStephen Hemminger struct leaf *l = (struct leaf *) n; 24181328042eSStephen Hemminger struct leaf_info *li; 24191328042eSStephen Hemminger struct hlist_node *node; 242032ab5f80SAl Viro __be32 val = htonl(l->key); 2421cb7b593cSStephen Hemminger 2422cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2423673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 242428d36e37SEric Dumazet 24251328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2426cb7b593cSStephen Hemminger struct fib_alias *fa; 242728d36e37SEric Dumazet 2428cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 242928d36e37SEric Dumazet char buf1[32], buf2[32]; 243028d36e37SEric Dumazet 2431cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 24321328042eSStephen Hemminger seq_printf(seq, " /%d %s %s", li->plen, 243328d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 243428d36e37SEric Dumazet fa->fa_scope), 243528d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 243628d36e37SEric Dumazet fa->fa_type)); 2437cb7b593cSStephen Hemminger if (fa->fa_tos) 2438b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2439cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2440cb7b593cSStephen Hemminger } 2441cb7b593cSStephen Hemminger } 2442cb7b593cSStephen Hemminger } 244319baf839SRobert Olsson 244419baf839SRobert Olsson return 0; 244519baf839SRobert Olsson } 244619baf839SRobert Olsson 2447f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 244819baf839SRobert Olsson .start = fib_trie_seq_start, 244919baf839SRobert Olsson .next = fib_trie_seq_next, 245019baf839SRobert Olsson .stop = fib_trie_seq_stop, 245119baf839SRobert Olsson .show = fib_trie_seq_show, 245219baf839SRobert Olsson }; 245319baf839SRobert Olsson 245419baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 245519baf839SRobert Olsson { 24561c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2457cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 245819baf839SRobert Olsson } 245919baf839SRobert Olsson 24609a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 246119baf839SRobert Olsson .owner = THIS_MODULE, 246219baf839SRobert Olsson .open = fib_trie_seq_open, 246319baf839SRobert Olsson .read = seq_read, 246419baf839SRobert Olsson .llseek = seq_lseek, 24651c340b2fSDenis V. Lunev .release = seq_release_net, 246619baf839SRobert Olsson }; 246719baf839SRobert Olsson 24688315f5d8SStephen Hemminger struct fib_route_iter { 24698315f5d8SStephen Hemminger struct seq_net_private p; 24708315f5d8SStephen Hemminger struct trie *main_trie; 24718315f5d8SStephen Hemminger loff_t pos; 24728315f5d8SStephen Hemminger t_key key; 24738315f5d8SStephen Hemminger }; 24748315f5d8SStephen Hemminger 24758315f5d8SStephen Hemminger static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) 24768315f5d8SStephen Hemminger { 24778315f5d8SStephen Hemminger struct leaf *l = NULL; 24788315f5d8SStephen Hemminger struct trie *t = iter->main_trie; 24798315f5d8SStephen Hemminger 24808315f5d8SStephen Hemminger /* use cache location of last found key */ 24818315f5d8SStephen Hemminger if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) 24828315f5d8SStephen Hemminger pos -= iter->pos; 24838315f5d8SStephen Hemminger else { 24848315f5d8SStephen Hemminger iter->pos = 0; 24858315f5d8SStephen Hemminger l = trie_firstleaf(t); 24868315f5d8SStephen Hemminger } 24878315f5d8SStephen Hemminger 24888315f5d8SStephen Hemminger while (l && pos-- > 0) { 24898315f5d8SStephen Hemminger iter->pos++; 24908315f5d8SStephen Hemminger l = trie_nextleaf(l); 24918315f5d8SStephen Hemminger } 24928315f5d8SStephen Hemminger 24938315f5d8SStephen Hemminger if (l) 24948315f5d8SStephen Hemminger iter->key = pos; /* remember it */ 24958315f5d8SStephen Hemminger else 24968315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 24978315f5d8SStephen Hemminger 24988315f5d8SStephen Hemminger return l; 24998315f5d8SStephen Hemminger } 25008315f5d8SStephen Hemminger 25018315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 25028315f5d8SStephen Hemminger __acquires(RCU) 25038315f5d8SStephen Hemminger { 25048315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 25058315f5d8SStephen Hemminger struct fib_table *tb; 25068315f5d8SStephen Hemminger 25078315f5d8SStephen Hemminger rcu_read_lock(); 25081218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 25098315f5d8SStephen Hemminger if (!tb) 25108315f5d8SStephen Hemminger return NULL; 25118315f5d8SStephen Hemminger 25128315f5d8SStephen Hemminger iter->main_trie = (struct trie *) tb->tb_data; 25138315f5d8SStephen Hemminger if (*pos == 0) 25148315f5d8SStephen Hemminger return SEQ_START_TOKEN; 25158315f5d8SStephen Hemminger else 25168315f5d8SStephen Hemminger return fib_route_get_idx(iter, *pos - 1); 25178315f5d8SStephen Hemminger } 25188315f5d8SStephen Hemminger 25198315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25208315f5d8SStephen Hemminger { 25218315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 25228315f5d8SStephen Hemminger struct leaf *l = v; 25238315f5d8SStephen Hemminger 25248315f5d8SStephen Hemminger ++*pos; 25258315f5d8SStephen Hemminger if (v == SEQ_START_TOKEN) { 25268315f5d8SStephen Hemminger iter->pos = 0; 25278315f5d8SStephen Hemminger l = trie_firstleaf(iter->main_trie); 25288315f5d8SStephen Hemminger } else { 25298315f5d8SStephen Hemminger iter->pos++; 25308315f5d8SStephen Hemminger l = trie_nextleaf(l); 25318315f5d8SStephen Hemminger } 25328315f5d8SStephen Hemminger 25338315f5d8SStephen Hemminger if (l) 25348315f5d8SStephen Hemminger iter->key = l->key; 25358315f5d8SStephen Hemminger else 25368315f5d8SStephen Hemminger iter->pos = 0; 25378315f5d8SStephen Hemminger return l; 25388315f5d8SStephen Hemminger } 25398315f5d8SStephen Hemminger 25408315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 25418315f5d8SStephen Hemminger __releases(RCU) 25428315f5d8SStephen Hemminger { 25438315f5d8SStephen Hemminger rcu_read_unlock(); 25448315f5d8SStephen Hemminger } 25458315f5d8SStephen Hemminger 254632ab5f80SAl Viro static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2547cb7b593cSStephen Hemminger { 2548cb7b593cSStephen Hemminger static unsigned type2flags[RTN_MAX + 1] = { 2549cb7b593cSStephen Hemminger [7] = RTF_REJECT, [8] = RTF_REJECT, 2550cb7b593cSStephen Hemminger }; 2551cb7b593cSStephen Hemminger unsigned flags = type2flags[type]; 2552cb7b593cSStephen Hemminger 2553cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2554cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 255532ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2556cb7b593cSStephen Hemminger flags |= RTF_HOST; 2557cb7b593cSStephen Hemminger flags |= RTF_UP; 2558cb7b593cSStephen Hemminger return flags; 2559cb7b593cSStephen Hemminger } 2560cb7b593cSStephen Hemminger 2561cb7b593cSStephen Hemminger /* 2562cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2563cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2564cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2565cb7b593cSStephen Hemminger * legacy utilities 2566cb7b593cSStephen Hemminger */ 2567cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2568cb7b593cSStephen Hemminger { 2569cb7b593cSStephen Hemminger struct leaf *l = v; 25701328042eSStephen Hemminger struct leaf_info *li; 25711328042eSStephen Hemminger struct hlist_node *node; 2572cb7b593cSStephen Hemminger 2573cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2574cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2575cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2576cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2577cb7b593cSStephen Hemminger return 0; 2578cb7b593cSStephen Hemminger } 2579cb7b593cSStephen Hemminger 25801328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2581cb7b593cSStephen Hemminger struct fib_alias *fa; 258232ab5f80SAl Viro __be32 mask, prefix; 2583cb7b593cSStephen Hemminger 2584cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2585cb7b593cSStephen Hemminger prefix = htonl(l->key); 2586cb7b593cSStephen Hemminger 2587cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 25881371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2589cb7b593cSStephen Hemminger unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 25905e659e4cSPavel Emelyanov int len; 2591cb7b593cSStephen Hemminger 2592cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2593cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2594cb7b593cSStephen Hemminger continue; 2595cb7b593cSStephen Hemminger 2596cb7b593cSStephen Hemminger if (fi) 25975e659e4cSPavel Emelyanov seq_printf(seq, 25985e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 25995e659e4cSPavel Emelyanov "%d\t%08X\t%d\t%u\t%u%n", 2600cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2601cb7b593cSStephen Hemminger prefix, 2602cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2603cb7b593cSStephen Hemminger fi->fib_priority, 2604cb7b593cSStephen Hemminger mask, 2605a07f5f50SStephen Hemminger (fi->fib_advmss ? 2606a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2607cb7b593cSStephen Hemminger fi->fib_window, 26085e659e4cSPavel Emelyanov fi->fib_rtt >> 3, &len); 2609cb7b593cSStephen Hemminger else 26105e659e4cSPavel Emelyanov seq_printf(seq, 26115e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 26125e659e4cSPavel Emelyanov "%d\t%08X\t%d\t%u\t%u%n", 2613cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 26145e659e4cSPavel Emelyanov mask, 0, 0, 0, &len); 2615cb7b593cSStephen Hemminger 26165e659e4cSPavel Emelyanov seq_printf(seq, "%*s\n", 127 - len, ""); 2617cb7b593cSStephen Hemminger } 2618cb7b593cSStephen Hemminger } 2619cb7b593cSStephen Hemminger 2620cb7b593cSStephen Hemminger return 0; 2621cb7b593cSStephen Hemminger } 2622cb7b593cSStephen Hemminger 2623f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 26248315f5d8SStephen Hemminger .start = fib_route_seq_start, 26258315f5d8SStephen Hemminger .next = fib_route_seq_next, 26268315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2627cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2628cb7b593cSStephen Hemminger }; 2629cb7b593cSStephen Hemminger 2630cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2631cb7b593cSStephen Hemminger { 26321c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 26338315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2634cb7b593cSStephen Hemminger } 2635cb7b593cSStephen Hemminger 26369a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2637cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2638cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2639cb7b593cSStephen Hemminger .read = seq_read, 2640cb7b593cSStephen Hemminger .llseek = seq_lseek, 26411c340b2fSDenis V. Lunev .release = seq_release_net, 2642cb7b593cSStephen Hemminger }; 2643cb7b593cSStephen Hemminger 264461a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 264519baf839SRobert Olsson { 264661a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2647cb7b593cSStephen Hemminger goto out1; 2648cb7b593cSStephen Hemminger 264961a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, 265061a02653SDenis V. Lunev &fib_triestat_fops)) 2651cb7b593cSStephen Hemminger goto out2; 2652cb7b593cSStephen Hemminger 265361a02653SDenis V. Lunev if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) 2654cb7b593cSStephen Hemminger goto out3; 2655cb7b593cSStephen Hemminger 265619baf839SRobert Olsson return 0; 2657cb7b593cSStephen Hemminger 2658cb7b593cSStephen Hemminger out3: 265961a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 2660cb7b593cSStephen Hemminger out2: 266161a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 2662cb7b593cSStephen Hemminger out1: 2663cb7b593cSStephen Hemminger return -ENOMEM; 266419baf839SRobert Olsson } 266519baf839SRobert Olsson 266661a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 266719baf839SRobert Olsson { 266861a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 266961a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 267061a02653SDenis V. Lunev proc_net_remove(net, "route"); 267119baf839SRobert Olsson } 267219baf839SRobert Olsson 267319baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2674