119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1519baf839SRobert Olsson * This work is based on the LPC-trie which is originally descibed in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 5419baf839SRobert Olsson #include <asm/system.h> 551977f032SJiri Slaby #include <linux/bitops.h> 5619baf839SRobert Olsson #include <linux/types.h> 5719baf839SRobert Olsson #include <linux/kernel.h> 5819baf839SRobert Olsson #include <linux/mm.h> 5919baf839SRobert Olsson #include <linux/string.h> 6019baf839SRobert Olsson #include <linux/socket.h> 6119baf839SRobert Olsson #include <linux/sockios.h> 6219baf839SRobert Olsson #include <linux/errno.h> 6319baf839SRobert Olsson #include <linux/in.h> 6419baf839SRobert Olsson #include <linux/inet.h> 65cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6619baf839SRobert Olsson #include <linux/netdevice.h> 6719baf839SRobert Olsson #include <linux/if_arp.h> 6819baf839SRobert Olsson #include <linux/proc_fs.h> 692373ce1cSRobert Olsson #include <linux/rcupdate.h> 7019baf839SRobert Olsson #include <linux/skbuff.h> 7119baf839SRobert Olsson #include <linux/netlink.h> 7219baf839SRobert Olsson #include <linux/init.h> 7319baf839SRobert Olsson #include <linux/list.h> 745a0e3ad6STejun Heo #include <linux/slab.h> 75457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7619baf839SRobert Olsson #include <net/ip.h> 7719baf839SRobert Olsson #include <net/protocol.h> 7819baf839SRobert Olsson #include <net/route.h> 7919baf839SRobert Olsson #include <net/tcp.h> 8019baf839SRobert Olsson #include <net/sock.h> 8119baf839SRobert Olsson #include <net/ip_fib.h> 8219baf839SRobert Olsson #include "fib_lookup.h" 8319baf839SRobert Olsson 8406ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 8519baf839SRobert Olsson 8619baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 8719baf839SRobert Olsson 8819baf839SRobert Olsson typedef unsigned int t_key; 8919baf839SRobert Olsson 9019baf839SRobert Olsson #define T_TNODE 0 9119baf839SRobert Olsson #define T_LEAF 1 9219baf839SRobert Olsson #define NODE_TYPE_MASK 0x1UL 932373ce1cSRobert Olsson #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) 942373ce1cSRobert Olsson 9591b9a277SOlof Johansson #define IS_TNODE(n) (!(n->parent & T_LEAF)) 9691b9a277SOlof Johansson #define IS_LEAF(n) (n->parent & T_LEAF) 9719baf839SRobert Olsson 98b299e4f0SDavid S. Miller struct rt_trie_node { 9991b9a277SOlof Johansson unsigned long parent; 1008d965444SEric Dumazet t_key key; 10119baf839SRobert Olsson }; 10219baf839SRobert Olsson 10319baf839SRobert Olsson struct leaf { 10491b9a277SOlof Johansson unsigned long parent; 1058d965444SEric Dumazet t_key key; 10619baf839SRobert Olsson struct hlist_head list; 1072373ce1cSRobert Olsson struct rcu_head rcu; 10819baf839SRobert Olsson }; 10919baf839SRobert Olsson 11019baf839SRobert Olsson struct leaf_info { 11119baf839SRobert Olsson struct hlist_node hlist; 1122373ce1cSRobert Olsson struct rcu_head rcu; 11319baf839SRobert Olsson int plen; 11419baf839SRobert Olsson struct list_head falh; 11519baf839SRobert Olsson }; 11619baf839SRobert Olsson 11719baf839SRobert Olsson struct tnode { 11891b9a277SOlof Johansson unsigned long parent; 1198d965444SEric Dumazet t_key key; 120112d8cfcSEric Dumazet unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 121112d8cfcSEric Dumazet unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 1228d965444SEric Dumazet unsigned int full_children; /* KEYLENGTH bits needed */ 1238d965444SEric Dumazet unsigned int empty_children; /* KEYLENGTH bits needed */ 12415be75cdSStephen Hemminger union { 1252373ce1cSRobert Olsson struct rcu_head rcu; 12615be75cdSStephen Hemminger struct work_struct work; 127e0f7cb8cSJarek Poplawski struct tnode *tnode_free; 12815be75cdSStephen Hemminger }; 129b299e4f0SDavid S. Miller struct rt_trie_node *child[0]; 13019baf839SRobert Olsson }; 13119baf839SRobert Olsson 13219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13319baf839SRobert Olsson struct trie_use_stats { 13419baf839SRobert Olsson unsigned int gets; 13519baf839SRobert Olsson unsigned int backtrack; 13619baf839SRobert Olsson unsigned int semantic_match_passed; 13719baf839SRobert Olsson unsigned int semantic_match_miss; 13819baf839SRobert Olsson unsigned int null_node_hit; 1392f36895aSRobert Olsson unsigned int resize_node_skipped; 14019baf839SRobert Olsson }; 14119baf839SRobert Olsson #endif 14219baf839SRobert Olsson 14319baf839SRobert Olsson struct trie_stat { 14419baf839SRobert Olsson unsigned int totdepth; 14519baf839SRobert Olsson unsigned int maxdepth; 14619baf839SRobert Olsson unsigned int tnodes; 14719baf839SRobert Olsson unsigned int leaves; 14819baf839SRobert Olsson unsigned int nullpointers; 14993672292SStephen Hemminger unsigned int prefixes; 15006ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 15119baf839SRobert Olsson }; 15219baf839SRobert Olsson 15319baf839SRobert Olsson struct trie { 154b299e4f0SDavid S. Miller struct rt_trie_node *trie; 15519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 15619baf839SRobert Olsson struct trie_use_stats stats; 15719baf839SRobert Olsson #endif 15819baf839SRobert Olsson }; 15919baf839SRobert Olsson 160b299e4f0SDavid S. Miller static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); 161b299e4f0SDavid S. Miller static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 162a07f5f50SStephen Hemminger int wasfull); 163b299e4f0SDavid S. Miller static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); 1642f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn); 1652f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn); 166e0f7cb8cSJarek Poplawski /* tnodes to free after resize(); protected by RTNL */ 167e0f7cb8cSJarek Poplawski static struct tnode *tnode_free_head; 168c3059477SJarek Poplawski static size_t tnode_free_size; 169c3059477SJarek Poplawski 170c3059477SJarek Poplawski /* 171c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 172c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 173c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 174c3059477SJarek Poplawski */ 175c3059477SJarek Poplawski static const int sync_pages = 128; 17619baf839SRobert Olsson 177e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 178bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 17919baf839SRobert Olsson 180b299e4f0SDavid S. Miller static inline struct tnode *node_parent(struct rt_trie_node *node) 18106801916SStephen Hemminger { 182b59cfbf7SEric Dumazet return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 183b59cfbf7SEric Dumazet } 18406801916SStephen Hemminger 185b299e4f0SDavid S. Miller static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) 186b59cfbf7SEric Dumazet { 187b59cfbf7SEric Dumazet struct tnode *ret = node_parent(node); 188b59cfbf7SEric Dumazet 189a034ee3cSEric Dumazet return rcu_dereference_rtnl(ret); 19006801916SStephen Hemminger } 19106801916SStephen Hemminger 1926440cc9eSStephen Hemminger /* Same as rcu_assign_pointer 1936440cc9eSStephen Hemminger * but that macro() assumes that value is a pointer. 1946440cc9eSStephen Hemminger */ 195b299e4f0SDavid S. Miller static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) 19606801916SStephen Hemminger { 1976440cc9eSStephen Hemminger smp_wmb(); 1986440cc9eSStephen Hemminger node->parent = (unsigned long)ptr | NODE_TYPE(node); 19906801916SStephen Hemminger } 2002373ce1cSRobert Olsson 201b299e4f0SDavid S. Miller static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) 20219baf839SRobert Olsson { 203b59cfbf7SEric Dumazet BUG_ON(i >= 1U << tn->bits); 20419baf839SRobert Olsson 205b59cfbf7SEric Dumazet return tn->child[i]; 206b59cfbf7SEric Dumazet } 207b59cfbf7SEric Dumazet 208b299e4f0SDavid S. Miller static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 209b59cfbf7SEric Dumazet { 210b299e4f0SDavid S. Miller struct rt_trie_node *ret = tnode_get_child(tn, i); 211b59cfbf7SEric Dumazet 212a034ee3cSEric Dumazet return rcu_dereference_rtnl(ret); 21319baf839SRobert Olsson } 21419baf839SRobert Olsson 215bb435b8dSStephen Hemmigner static inline int tnode_child_length(const struct tnode *tn) 21619baf839SRobert Olsson { 21719baf839SRobert Olsson return 1 << tn->bits; 21819baf839SRobert Olsson } 21919baf839SRobert Olsson 2203b004569SDavid S. Miller static inline t_key mask_pfx(t_key k, unsigned int l) 221ab66b4a7SStephen Hemminger { 222ab66b4a7SStephen Hemminger return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 223ab66b4a7SStephen Hemminger } 224ab66b4a7SStephen Hemminger 2253b004569SDavid S. Miller static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) 22619baf839SRobert Olsson { 22719baf839SRobert Olsson if (offset < KEYLENGTH) 22819baf839SRobert Olsson return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 22919baf839SRobert Olsson else 23019baf839SRobert Olsson return 0; 23119baf839SRobert Olsson } 23219baf839SRobert Olsson 23319baf839SRobert Olsson static inline int tkey_equals(t_key a, t_key b) 23419baf839SRobert Olsson { 23519baf839SRobert Olsson return a == b; 23619baf839SRobert Olsson } 23719baf839SRobert Olsson 23819baf839SRobert Olsson static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 23919baf839SRobert Olsson { 24019baf839SRobert Olsson if (bits == 0 || offset >= KEYLENGTH) 24119baf839SRobert Olsson return 1; 24219baf839SRobert Olsson bits = bits > KEYLENGTH ? KEYLENGTH : bits; 24319baf839SRobert Olsson return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 24419baf839SRobert Olsson } 24519baf839SRobert Olsson 24619baf839SRobert Olsson static inline int tkey_mismatch(t_key a, int offset, t_key b) 24719baf839SRobert Olsson { 24819baf839SRobert Olsson t_key diff = a ^ b; 24919baf839SRobert Olsson int i = offset; 25019baf839SRobert Olsson 25119baf839SRobert Olsson if (!diff) 25219baf839SRobert Olsson return 0; 25319baf839SRobert Olsson while ((diff << i) >> (KEYLENGTH-1) == 0) 25419baf839SRobert Olsson i++; 25519baf839SRobert Olsson return i; 25619baf839SRobert Olsson } 25719baf839SRobert Olsson 25819baf839SRobert Olsson /* 25919baf839SRobert Olsson To understand this stuff, an understanding of keys and all their bits is 26019baf839SRobert Olsson necessary. Every node in the trie has a key associated with it, but not 26119baf839SRobert Olsson all of the bits in that key are significant. 26219baf839SRobert Olsson 26319baf839SRobert Olsson Consider a node 'n' and its parent 'tp'. 26419baf839SRobert Olsson 26519baf839SRobert Olsson If n is a leaf, every bit in its key is significant. Its presence is 266772cb712SRobert Olsson necessitated by path compression, since during a tree traversal (when 26719baf839SRobert Olsson searching for a leaf - unless we are doing an insertion) we will completely 26819baf839SRobert Olsson ignore all skipped bits we encounter. Thus we need to verify, at the end of 26919baf839SRobert Olsson a potentially successful search, that we have indeed been walking the 27019baf839SRobert Olsson correct key path. 27119baf839SRobert Olsson 27219baf839SRobert Olsson Note that we can never "miss" the correct key in the tree if present by 27319baf839SRobert Olsson following the wrong path. Path compression ensures that segments of the key 27419baf839SRobert Olsson that are the same for all keys with a given prefix are skipped, but the 27519baf839SRobert Olsson skipped part *is* identical for each node in the subtrie below the skipped 27619baf839SRobert Olsson bit! trie_insert() in this implementation takes care of that - note the 27719baf839SRobert Olsson call to tkey_sub_equals() in trie_insert(). 27819baf839SRobert Olsson 27919baf839SRobert Olsson if n is an internal node - a 'tnode' here, the various parts of its key 28019baf839SRobert Olsson have many different meanings. 28119baf839SRobert Olsson 28219baf839SRobert Olsson Example: 28319baf839SRobert Olsson _________________________________________________________________ 28419baf839SRobert Olsson | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 28519baf839SRobert Olsson ----------------------------------------------------------------- 28619baf839SRobert Olsson 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 28719baf839SRobert Olsson 28819baf839SRobert Olsson _________________________________________________________________ 28919baf839SRobert Olsson | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 29019baf839SRobert Olsson ----------------------------------------------------------------- 29119baf839SRobert Olsson 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 29219baf839SRobert Olsson 29319baf839SRobert Olsson tp->pos = 7 29419baf839SRobert Olsson tp->bits = 3 29519baf839SRobert Olsson n->pos = 15 29619baf839SRobert Olsson n->bits = 4 29719baf839SRobert Olsson 29819baf839SRobert Olsson First, let's just ignore the bits that come before the parent tp, that is 29919baf839SRobert Olsson the bits from 0 to (tp->pos-1). They are *known* but at this point we do 30019baf839SRobert Olsson not use them for anything. 30119baf839SRobert Olsson 30219baf839SRobert Olsson The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 30319baf839SRobert Olsson index into the parent's child array. That is, they will be used to find 30419baf839SRobert Olsson 'n' among tp's children. 30519baf839SRobert Olsson 30619baf839SRobert Olsson The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 30719baf839SRobert Olsson for the node n. 30819baf839SRobert Olsson 30919baf839SRobert Olsson All the bits we have seen so far are significant to the node n. The rest 31019baf839SRobert Olsson of the bits are really not needed or indeed known in n->key. 31119baf839SRobert Olsson 31219baf839SRobert Olsson The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 31319baf839SRobert Olsson n's child array, and will of course be different for each child. 31419baf839SRobert Olsson 315c877efb2SStephen Hemminger 31619baf839SRobert Olsson The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 31719baf839SRobert Olsson at this point. 31819baf839SRobert Olsson 31919baf839SRobert Olsson */ 32019baf839SRobert Olsson 3210c7770c7SStephen Hemminger static inline void check_tnode(const struct tnode *tn) 32219baf839SRobert Olsson { 3230c7770c7SStephen Hemminger WARN_ON(tn && tn->pos+tn->bits > 32); 32419baf839SRobert Olsson } 32519baf839SRobert Olsson 326f5026fabSDenis V. Lunev static const int halve_threshold = 25; 327f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 328345aa031SJarek Poplawski static const int halve_threshold_root = 15; 32980b71b80SJens Låås static const int inflate_threshold_root = 30; 3302373ce1cSRobert Olsson 3312373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3322373ce1cSRobert Olsson { 3332373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3342373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3352373ce1cSRobert Olsson } 3362373ce1cSRobert Olsson 3372373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3382373ce1cSRobert Olsson { 3392373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3402373ce1cSRobert Olsson } 3412373ce1cSRobert Olsson 3422373ce1cSRobert Olsson static void __leaf_free_rcu(struct rcu_head *head) 3432373ce1cSRobert Olsson { 344bc3c8c1eSStephen Hemminger struct leaf *l = container_of(head, struct leaf, rcu); 345bc3c8c1eSStephen Hemminger kmem_cache_free(trie_leaf_kmem, l); 3462373ce1cSRobert Olsson } 3472373ce1cSRobert Olsson 348387a5487SStephen Hemminger static inline void free_leaf(struct leaf *l) 349387a5487SStephen Hemminger { 350387a5487SStephen Hemminger call_rcu_bh(&l->rcu, __leaf_free_rcu); 351387a5487SStephen Hemminger } 352387a5487SStephen Hemminger 3532373ce1cSRobert Olsson static void __leaf_info_free_rcu(struct rcu_head *head) 3542373ce1cSRobert Olsson { 3552373ce1cSRobert Olsson kfree(container_of(head, struct leaf_info, rcu)); 3562373ce1cSRobert Olsson } 3572373ce1cSRobert Olsson 3582373ce1cSRobert Olsson static inline void free_leaf_info(struct leaf_info *leaf) 3592373ce1cSRobert Olsson { 3602373ce1cSRobert Olsson call_rcu(&leaf->rcu, __leaf_info_free_rcu); 3612373ce1cSRobert Olsson } 3622373ce1cSRobert Olsson 3638d965444SEric Dumazet static struct tnode *tnode_alloc(size_t size) 3642373ce1cSRobert Olsson { 3652373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3668d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 36715be75cdSStephen Hemminger else 3687a1c8e5aSEric Dumazet return vzalloc(size); 36915be75cdSStephen Hemminger } 3702373ce1cSRobert Olsson 37115be75cdSStephen Hemminger static void __tnode_vfree(struct work_struct *arg) 37215be75cdSStephen Hemminger { 37315be75cdSStephen Hemminger struct tnode *tn = container_of(arg, struct tnode, work); 37415be75cdSStephen Hemminger vfree(tn); 3752373ce1cSRobert Olsson } 3762373ce1cSRobert Olsson 3772373ce1cSRobert Olsson static void __tnode_free_rcu(struct rcu_head *head) 3782373ce1cSRobert Olsson { 3792373ce1cSRobert Olsson struct tnode *tn = container_of(head, struct tnode, rcu); 3808d965444SEric Dumazet size_t size = sizeof(struct tnode) + 381b299e4f0SDavid S. Miller (sizeof(struct rt_trie_node *) << tn->bits); 3822373ce1cSRobert Olsson 3832373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3842373ce1cSRobert Olsson kfree(tn); 38515be75cdSStephen Hemminger else { 38615be75cdSStephen Hemminger INIT_WORK(&tn->work, __tnode_vfree); 38715be75cdSStephen Hemminger schedule_work(&tn->work); 38815be75cdSStephen Hemminger } 3892373ce1cSRobert Olsson } 3902373ce1cSRobert Olsson 3912373ce1cSRobert Olsson static inline void tnode_free(struct tnode *tn) 3922373ce1cSRobert Olsson { 393387a5487SStephen Hemminger if (IS_LEAF(tn)) 394387a5487SStephen Hemminger free_leaf((struct leaf *) tn); 395387a5487SStephen Hemminger else 3962373ce1cSRobert Olsson call_rcu(&tn->rcu, __tnode_free_rcu); 3972373ce1cSRobert Olsson } 3982373ce1cSRobert Olsson 399e0f7cb8cSJarek Poplawski static void tnode_free_safe(struct tnode *tn) 400e0f7cb8cSJarek Poplawski { 401e0f7cb8cSJarek Poplawski BUG_ON(IS_LEAF(tn)); 402e0f7cb8cSJarek Poplawski tn->tnode_free = tnode_free_head; 403e0f7cb8cSJarek Poplawski tnode_free_head = tn; 404c3059477SJarek Poplawski tnode_free_size += sizeof(struct tnode) + 405b299e4f0SDavid S. Miller (sizeof(struct rt_trie_node *) << tn->bits); 406e0f7cb8cSJarek Poplawski } 407e0f7cb8cSJarek Poplawski 408e0f7cb8cSJarek Poplawski static void tnode_free_flush(void) 409e0f7cb8cSJarek Poplawski { 410e0f7cb8cSJarek Poplawski struct tnode *tn; 411e0f7cb8cSJarek Poplawski 412e0f7cb8cSJarek Poplawski while ((tn = tnode_free_head)) { 413e0f7cb8cSJarek Poplawski tnode_free_head = tn->tnode_free; 414e0f7cb8cSJarek Poplawski tn->tnode_free = NULL; 415e0f7cb8cSJarek Poplawski tnode_free(tn); 416e0f7cb8cSJarek Poplawski } 417c3059477SJarek Poplawski 418c3059477SJarek Poplawski if (tnode_free_size >= PAGE_SIZE * sync_pages) { 419c3059477SJarek Poplawski tnode_free_size = 0; 420c3059477SJarek Poplawski synchronize_rcu(); 421c3059477SJarek Poplawski } 422e0f7cb8cSJarek Poplawski } 423e0f7cb8cSJarek Poplawski 42419baf839SRobert Olsson static struct leaf *leaf_new(void) 42519baf839SRobert Olsson { 426bc3c8c1eSStephen Hemminger struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 42719baf839SRobert Olsson if (l) { 4282373ce1cSRobert Olsson l->parent = T_LEAF; 42919baf839SRobert Olsson INIT_HLIST_HEAD(&l->list); 43019baf839SRobert Olsson } 43119baf839SRobert Olsson return l; 43219baf839SRobert Olsson } 43319baf839SRobert Olsson 43419baf839SRobert Olsson static struct leaf_info *leaf_info_new(int plen) 43519baf839SRobert Olsson { 43619baf839SRobert Olsson struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 4372373ce1cSRobert Olsson if (li) { 43819baf839SRobert Olsson li->plen = plen; 43919baf839SRobert Olsson INIT_LIST_HEAD(&li->falh); 4402373ce1cSRobert Olsson } 44119baf839SRobert Olsson return li; 44219baf839SRobert Olsson } 44319baf839SRobert Olsson 44419baf839SRobert Olsson static struct tnode *tnode_new(t_key key, int pos, int bits) 44519baf839SRobert Olsson { 446b299e4f0SDavid S. Miller size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); 447f0e36f8cSPatrick McHardy struct tnode *tn = tnode_alloc(sz); 44819baf839SRobert Olsson 44919baf839SRobert Olsson if (tn) { 4502373ce1cSRobert Olsson tn->parent = T_TNODE; 45119baf839SRobert Olsson tn->pos = pos; 45219baf839SRobert Olsson tn->bits = bits; 45319baf839SRobert Olsson tn->key = key; 45419baf839SRobert Olsson tn->full_children = 0; 45519baf839SRobert Olsson tn->empty_children = 1<<bits; 45619baf839SRobert Olsson } 457c877efb2SStephen Hemminger 458a034ee3cSEric Dumazet pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 459b299e4f0SDavid S. Miller sizeof(struct rt_trie_node) << bits); 46019baf839SRobert Olsson return tn; 46119baf839SRobert Olsson } 46219baf839SRobert Olsson 46319baf839SRobert Olsson /* 46419baf839SRobert Olsson * Check whether a tnode 'n' is "full", i.e. it is an internal node 46519baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 46619baf839SRobert Olsson */ 46719baf839SRobert Olsson 468b299e4f0SDavid S. Miller static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) 46919baf839SRobert Olsson { 47019baf839SRobert Olsson if (n == NULL || IS_LEAF(n)) 47119baf839SRobert Olsson return 0; 47219baf839SRobert Olsson 47319baf839SRobert Olsson return ((struct tnode *) n)->pos == tn->pos + tn->bits; 47419baf839SRobert Olsson } 47519baf839SRobert Olsson 476a07f5f50SStephen Hemminger static inline void put_child(struct trie *t, struct tnode *tn, int i, 477b299e4f0SDavid S. Miller struct rt_trie_node *n) 47819baf839SRobert Olsson { 47919baf839SRobert Olsson tnode_put_child_reorg(tn, i, n, -1); 48019baf839SRobert Olsson } 48119baf839SRobert Olsson 48219baf839SRobert Olsson /* 48319baf839SRobert Olsson * Add a child at position i overwriting the old value. 48419baf839SRobert Olsson * Update the value of full_children and empty_children. 48519baf839SRobert Olsson */ 48619baf839SRobert Olsson 487b299e4f0SDavid S. Miller static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 488a07f5f50SStephen Hemminger int wasfull) 48919baf839SRobert Olsson { 490b299e4f0SDavid S. Miller struct rt_trie_node *chi = tn->child[i]; 49119baf839SRobert Olsson int isfull; 49219baf839SRobert Olsson 4930c7770c7SStephen Hemminger BUG_ON(i >= 1<<tn->bits); 4940c7770c7SStephen Hemminger 49519baf839SRobert Olsson /* update emptyChildren */ 49619baf839SRobert Olsson if (n == NULL && chi != NULL) 49719baf839SRobert Olsson tn->empty_children++; 49819baf839SRobert Olsson else if (n != NULL && chi == NULL) 49919baf839SRobert Olsson tn->empty_children--; 50019baf839SRobert Olsson 50119baf839SRobert Olsson /* update fullChildren */ 50219baf839SRobert Olsson if (wasfull == -1) 50319baf839SRobert Olsson wasfull = tnode_full(tn, chi); 50419baf839SRobert Olsson 50519baf839SRobert Olsson isfull = tnode_full(tn, n); 50619baf839SRobert Olsson if (wasfull && !isfull) 50719baf839SRobert Olsson tn->full_children--; 50819baf839SRobert Olsson else if (!wasfull && isfull) 50919baf839SRobert Olsson tn->full_children++; 51091b9a277SOlof Johansson 51119baf839SRobert Olsson if (n) 51206801916SStephen Hemminger node_set_parent(n, tn); 51319baf839SRobert Olsson 5142373ce1cSRobert Olsson rcu_assign_pointer(tn->child[i], n); 51519baf839SRobert Olsson } 51619baf839SRobert Olsson 51780b71b80SJens Låås #define MAX_WORK 10 518b299e4f0SDavid S. Miller static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) 51919baf839SRobert Olsson { 52019baf839SRobert Olsson int i; 5212f80b3c8SRobert Olsson struct tnode *old_tn; 522e6308be8SRobert Olsson int inflate_threshold_use; 523e6308be8SRobert Olsson int halve_threshold_use; 52480b71b80SJens Låås int max_work; 52519baf839SRobert Olsson 52619baf839SRobert Olsson if (!tn) 52719baf839SRobert Olsson return NULL; 52819baf839SRobert Olsson 5290c7770c7SStephen Hemminger pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 53019baf839SRobert Olsson tn, inflate_threshold, halve_threshold); 53119baf839SRobert Olsson 53219baf839SRobert Olsson /* No children */ 53319baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn)) { 534e0f7cb8cSJarek Poplawski tnode_free_safe(tn); 53519baf839SRobert Olsson return NULL; 53619baf839SRobert Olsson } 53719baf839SRobert Olsson /* One child */ 53819baf839SRobert Olsson if (tn->empty_children == tnode_child_length(tn) - 1) 53980b71b80SJens Låås goto one_child; 54019baf839SRobert Olsson /* 54119baf839SRobert Olsson * Double as long as the resulting node has a number of 54219baf839SRobert Olsson * nonempty nodes that are above the threshold. 54319baf839SRobert Olsson */ 54419baf839SRobert Olsson 54519baf839SRobert Olsson /* 54619baf839SRobert Olsson * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 54719baf839SRobert Olsson * the Helsinki University of Technology and Matti Tikkanen of Nokia 54819baf839SRobert Olsson * Telecommunications, page 6: 54919baf839SRobert Olsson * "A node is doubled if the ratio of non-empty children to all 55019baf839SRobert Olsson * children in the *doubled* node is at least 'high'." 55119baf839SRobert Olsson * 55219baf839SRobert Olsson * 'high' in this instance is the variable 'inflate_threshold'. It 55319baf839SRobert Olsson * is expressed as a percentage, so we multiply it with 55419baf839SRobert Olsson * tnode_child_length() and instead of multiplying by 2 (since the 55519baf839SRobert Olsson * child array will be doubled by inflate()) and multiplying 55619baf839SRobert Olsson * the left-hand side by 100 (to handle the percentage thing) we 55719baf839SRobert Olsson * multiply the left-hand side by 50. 55819baf839SRobert Olsson * 55919baf839SRobert Olsson * The left-hand side may look a bit weird: tnode_child_length(tn) 56019baf839SRobert Olsson * - tn->empty_children is of course the number of non-null children 56119baf839SRobert Olsson * in the current node. tn->full_children is the number of "full" 56219baf839SRobert Olsson * children, that is non-null tnodes with a skip value of 0. 56319baf839SRobert Olsson * All of those will be doubled in the resulting inflated tnode, so 56419baf839SRobert Olsson * we just count them one extra time here. 56519baf839SRobert Olsson * 56619baf839SRobert Olsson * A clearer way to write this would be: 56719baf839SRobert Olsson * 56819baf839SRobert Olsson * to_be_doubled = tn->full_children; 56919baf839SRobert Olsson * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 57019baf839SRobert Olsson * tn->full_children; 57119baf839SRobert Olsson * 57219baf839SRobert Olsson * new_child_length = tnode_child_length(tn) * 2; 57319baf839SRobert Olsson * 57419baf839SRobert Olsson * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 57519baf839SRobert Olsson * new_child_length; 57619baf839SRobert Olsson * if (new_fill_factor >= inflate_threshold) 57719baf839SRobert Olsson * 57819baf839SRobert Olsson * ...and so on, tho it would mess up the while () loop. 57919baf839SRobert Olsson * 58019baf839SRobert Olsson * anyway, 58119baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 58219baf839SRobert Olsson * inflate_threshold 58319baf839SRobert Olsson * 58419baf839SRobert Olsson * avoid a division: 58519baf839SRobert Olsson * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 58619baf839SRobert Olsson * inflate_threshold * new_child_length 58719baf839SRobert Olsson * 58819baf839SRobert Olsson * expand not_to_be_doubled and to_be_doubled, and shorten: 58919baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 59019baf839SRobert Olsson * tn->full_children) >= inflate_threshold * new_child_length 59119baf839SRobert Olsson * 59219baf839SRobert Olsson * expand new_child_length: 59319baf839SRobert Olsson * 100 * (tnode_child_length(tn) - tn->empty_children + 59419baf839SRobert Olsson * tn->full_children) >= 59519baf839SRobert Olsson * inflate_threshold * tnode_child_length(tn) * 2 59619baf839SRobert Olsson * 59719baf839SRobert Olsson * shorten again: 59819baf839SRobert Olsson * 50 * (tn->full_children + tnode_child_length(tn) - 59919baf839SRobert Olsson * tn->empty_children) >= inflate_threshold * 60019baf839SRobert Olsson * tnode_child_length(tn) 60119baf839SRobert Olsson * 60219baf839SRobert Olsson */ 60319baf839SRobert Olsson 60419baf839SRobert Olsson check_tnode(tn); 60519baf839SRobert Olsson 606e6308be8SRobert Olsson /* Keep root node larger */ 607e6308be8SRobert Olsson 608b299e4f0SDavid S. Miller if (!node_parent((struct rt_trie_node *)tn)) { 60980b71b80SJens Låås inflate_threshold_use = inflate_threshold_root; 61080b71b80SJens Låås halve_threshold_use = halve_threshold_root; 611a034ee3cSEric Dumazet } else { 612e6308be8SRobert Olsson inflate_threshold_use = inflate_threshold; 61380b71b80SJens Låås halve_threshold_use = halve_threshold; 61480b71b80SJens Låås } 615e6308be8SRobert Olsson 61680b71b80SJens Låås max_work = MAX_WORK; 61780b71b80SJens Låås while ((tn->full_children > 0 && max_work-- && 618a07f5f50SStephen Hemminger 50 * (tn->full_children + tnode_child_length(tn) 619a07f5f50SStephen Hemminger - tn->empty_children) 620a07f5f50SStephen Hemminger >= inflate_threshold_use * tnode_child_length(tn))) { 62119baf839SRobert Olsson 6222f80b3c8SRobert Olsson old_tn = tn; 6232f80b3c8SRobert Olsson tn = inflate(t, tn); 624a07f5f50SStephen Hemminger 6252f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6262f80b3c8SRobert Olsson tn = old_tn; 6272f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6282f36895aSRobert Olsson t->stats.resize_node_skipped++; 6292f36895aSRobert Olsson #endif 6302f36895aSRobert Olsson break; 6312f36895aSRobert Olsson } 63219baf839SRobert Olsson } 63319baf839SRobert Olsson 63419baf839SRobert Olsson check_tnode(tn); 63519baf839SRobert Olsson 63680b71b80SJens Låås /* Return if at least one inflate is run */ 63780b71b80SJens Låås if (max_work != MAX_WORK) 638b299e4f0SDavid S. Miller return (struct rt_trie_node *) tn; 63980b71b80SJens Låås 64019baf839SRobert Olsson /* 64119baf839SRobert Olsson * Halve as long as the number of empty children in this 64219baf839SRobert Olsson * node is above threshold. 64319baf839SRobert Olsson */ 6442f36895aSRobert Olsson 64580b71b80SJens Låås max_work = MAX_WORK; 64680b71b80SJens Låås while (tn->bits > 1 && max_work-- && 64719baf839SRobert Olsson 100 * (tnode_child_length(tn) - tn->empty_children) < 648e6308be8SRobert Olsson halve_threshold_use * tnode_child_length(tn)) { 64919baf839SRobert Olsson 6502f80b3c8SRobert Olsson old_tn = tn; 6512f80b3c8SRobert Olsson tn = halve(t, tn); 6522f80b3c8SRobert Olsson if (IS_ERR(tn)) { 6532f80b3c8SRobert Olsson tn = old_tn; 6542f36895aSRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 6552f36895aSRobert Olsson t->stats.resize_node_skipped++; 6562f36895aSRobert Olsson #endif 6572f36895aSRobert Olsson break; 6582f36895aSRobert Olsson } 6592f36895aSRobert Olsson } 6602f36895aSRobert Olsson 66119baf839SRobert Olsson 66219baf839SRobert Olsson /* Only one child remains */ 66380b71b80SJens Låås if (tn->empty_children == tnode_child_length(tn) - 1) { 66480b71b80SJens Låås one_child: 66519baf839SRobert Olsson for (i = 0; i < tnode_child_length(tn); i++) { 666b299e4f0SDavid S. Miller struct rt_trie_node *n; 66719baf839SRobert Olsson 66891b9a277SOlof Johansson n = tn->child[i]; 6692373ce1cSRobert Olsson if (!n) 67091b9a277SOlof Johansson continue; 67191b9a277SOlof Johansson 67291b9a277SOlof Johansson /* compress one level */ 67391b9a277SOlof Johansson 67406801916SStephen Hemminger node_set_parent(n, NULL); 675e0f7cb8cSJarek Poplawski tnode_free_safe(tn); 67619baf839SRobert Olsson return n; 67719baf839SRobert Olsson } 67880b71b80SJens Låås } 679b299e4f0SDavid S. Miller return (struct rt_trie_node *) tn; 68019baf839SRobert Olsson } 68119baf839SRobert Olsson 6822f80b3c8SRobert Olsson static struct tnode *inflate(struct trie *t, struct tnode *tn) 68319baf839SRobert Olsson { 68419baf839SRobert Olsson struct tnode *oldtnode = tn; 68519baf839SRobert Olsson int olen = tnode_child_length(tn); 68619baf839SRobert Olsson int i; 68719baf839SRobert Olsson 6880c7770c7SStephen Hemminger pr_debug("In inflate\n"); 68919baf839SRobert Olsson 69019baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 69119baf839SRobert Olsson 6922f80b3c8SRobert Olsson if (!tn) 6932f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 6942f36895aSRobert Olsson 6952f36895aSRobert Olsson /* 6962f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 6972f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 6982f36895aSRobert Olsson * fails. In case of failure we return the oldnode and inflate 6992f36895aSRobert Olsson * of tnode is ignored. 7002f36895aSRobert Olsson */ 7012f36895aSRobert Olsson 7022f36895aSRobert Olsson for (i = 0; i < olen; i++) { 703a07f5f50SStephen Hemminger struct tnode *inode; 7042f36895aSRobert Olsson 705a07f5f50SStephen Hemminger inode = (struct tnode *) tnode_get_child(oldtnode, i); 7062f36895aSRobert Olsson if (inode && 7072f36895aSRobert Olsson IS_TNODE(inode) && 7082f36895aSRobert Olsson inode->pos == oldtnode->pos + oldtnode->bits && 7092f36895aSRobert Olsson inode->bits > 1) { 7102f36895aSRobert Olsson struct tnode *left, *right; 711ab66b4a7SStephen Hemminger t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; 7122f36895aSRobert Olsson 7132f36895aSRobert Olsson left = tnode_new(inode->key&(~m), inode->pos + 1, 7142f36895aSRobert Olsson inode->bits - 1); 7152f80b3c8SRobert Olsson if (!left) 7162f80b3c8SRobert Olsson goto nomem; 7172f36895aSRobert Olsson 7182f36895aSRobert Olsson right = tnode_new(inode->key|m, inode->pos + 1, 7192f36895aSRobert Olsson inode->bits - 1); 7202f36895aSRobert Olsson 7212f36895aSRobert Olsson if (!right) { 7222f80b3c8SRobert Olsson tnode_free(left); 7232f80b3c8SRobert Olsson goto nomem; 7242f36895aSRobert Olsson } 7252f36895aSRobert Olsson 726b299e4f0SDavid S. Miller put_child(t, tn, 2*i, (struct rt_trie_node *) left); 727b299e4f0SDavid S. Miller put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); 7282f36895aSRobert Olsson } 7292f36895aSRobert Olsson } 7302f36895aSRobert Olsson 73119baf839SRobert Olsson for (i = 0; i < olen; i++) { 732c95aaf9aSStephen Hemminger struct tnode *inode; 733b299e4f0SDavid S. Miller struct rt_trie_node *node = tnode_get_child(oldtnode, i); 73491b9a277SOlof Johansson struct tnode *left, *right; 73591b9a277SOlof Johansson int size, j; 73619baf839SRobert Olsson 73719baf839SRobert Olsson /* An empty child */ 73819baf839SRobert Olsson if (node == NULL) 73919baf839SRobert Olsson continue; 74019baf839SRobert Olsson 74119baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 74219baf839SRobert Olsson 74319baf839SRobert Olsson if (IS_LEAF(node) || ((struct tnode *) node)->pos > 74419baf839SRobert Olsson tn->pos + tn->bits - 1) { 745a07f5f50SStephen Hemminger if (tkey_extract_bits(node->key, 746a07f5f50SStephen Hemminger oldtnode->pos + oldtnode->bits, 74719baf839SRobert Olsson 1) == 0) 74819baf839SRobert Olsson put_child(t, tn, 2*i, node); 74919baf839SRobert Olsson else 75019baf839SRobert Olsson put_child(t, tn, 2*i+1, node); 75119baf839SRobert Olsson continue; 75219baf839SRobert Olsson } 75319baf839SRobert Olsson 75419baf839SRobert Olsson /* An internal node with two children */ 75519baf839SRobert Olsson inode = (struct tnode *) node; 75619baf839SRobert Olsson 75719baf839SRobert Olsson if (inode->bits == 1) { 75819baf839SRobert Olsson put_child(t, tn, 2*i, inode->child[0]); 75919baf839SRobert Olsson put_child(t, tn, 2*i+1, inode->child[1]); 76019baf839SRobert Olsson 761e0f7cb8cSJarek Poplawski tnode_free_safe(inode); 76291b9a277SOlof Johansson continue; 76319baf839SRobert Olsson } 76419baf839SRobert Olsson 76519baf839SRobert Olsson /* An internal node with more than two children */ 76619baf839SRobert Olsson 76719baf839SRobert Olsson /* We will replace this node 'inode' with two new 76819baf839SRobert Olsson * ones, 'left' and 'right', each with half of the 76919baf839SRobert Olsson * original children. The two new nodes will have 77019baf839SRobert Olsson * a position one bit further down the key and this 77119baf839SRobert Olsson * means that the "significant" part of their keys 77219baf839SRobert Olsson * (see the discussion near the top of this file) 77319baf839SRobert Olsson * will differ by one bit, which will be "0" in 77419baf839SRobert Olsson * left's key and "1" in right's key. Since we are 77519baf839SRobert Olsson * moving the key position by one step, the bit that 77619baf839SRobert Olsson * we are moving away from - the bit at position 77719baf839SRobert Olsson * (inode->pos) - is the one that will differ between 77819baf839SRobert Olsson * left and right. So... we synthesize that bit in the 77919baf839SRobert Olsson * two new keys. 78019baf839SRobert Olsson * The mask 'm' below will be a single "one" bit at 78119baf839SRobert Olsson * the position (inode->pos) 78219baf839SRobert Olsson */ 78319baf839SRobert Olsson 78419baf839SRobert Olsson /* Use the old key, but set the new significant 78519baf839SRobert Olsson * bit to zero. 78619baf839SRobert Olsson */ 7872f36895aSRobert Olsson 7882f36895aSRobert Olsson left = (struct tnode *) tnode_get_child(tn, 2*i); 7892f36895aSRobert Olsson put_child(t, tn, 2*i, NULL); 79019baf839SRobert Olsson 79191b9a277SOlof Johansson BUG_ON(!left); 79219baf839SRobert Olsson 7932f36895aSRobert Olsson right = (struct tnode *) tnode_get_child(tn, 2*i+1); 7942f36895aSRobert Olsson put_child(t, tn, 2*i+1, NULL); 79519baf839SRobert Olsson 79691b9a277SOlof Johansson BUG_ON(!right); 79719baf839SRobert Olsson 79819baf839SRobert Olsson size = tnode_child_length(left); 79919baf839SRobert Olsson for (j = 0; j < size; j++) { 80019baf839SRobert Olsson put_child(t, left, j, inode->child[j]); 80119baf839SRobert Olsson put_child(t, right, j, inode->child[j + size]); 80219baf839SRobert Olsson } 80319baf839SRobert Olsson put_child(t, tn, 2*i, resize(t, left)); 80419baf839SRobert Olsson put_child(t, tn, 2*i+1, resize(t, right)); 80519baf839SRobert Olsson 806e0f7cb8cSJarek Poplawski tnode_free_safe(inode); 80719baf839SRobert Olsson } 808e0f7cb8cSJarek Poplawski tnode_free_safe(oldtnode); 80919baf839SRobert Olsson return tn; 8102f80b3c8SRobert Olsson nomem: 8112f80b3c8SRobert Olsson { 8122f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8132f80b3c8SRobert Olsson int j; 8142f80b3c8SRobert Olsson 8152f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8162f80b3c8SRobert Olsson if (tn->child[j]) 8172f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 8182f80b3c8SRobert Olsson 8192f80b3c8SRobert Olsson tnode_free(tn); 8202f80b3c8SRobert Olsson 8212f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8222f80b3c8SRobert Olsson } 82319baf839SRobert Olsson } 82419baf839SRobert Olsson 8252f80b3c8SRobert Olsson static struct tnode *halve(struct trie *t, struct tnode *tn) 82619baf839SRobert Olsson { 82719baf839SRobert Olsson struct tnode *oldtnode = tn; 828b299e4f0SDavid S. Miller struct rt_trie_node *left, *right; 82919baf839SRobert Olsson int i; 83019baf839SRobert Olsson int olen = tnode_child_length(tn); 83119baf839SRobert Olsson 8320c7770c7SStephen Hemminger pr_debug("In halve\n"); 83319baf839SRobert Olsson 83419baf839SRobert Olsson tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 83519baf839SRobert Olsson 8362f80b3c8SRobert Olsson if (!tn) 8372f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 8382f36895aSRobert Olsson 8392f36895aSRobert Olsson /* 8402f36895aSRobert Olsson * Preallocate and store tnodes before the actual work so we 8412f36895aSRobert Olsson * don't get into an inconsistent state if memory allocation 8422f36895aSRobert Olsson * fails. In case of failure we return the oldnode and halve 8432f36895aSRobert Olsson * of tnode is ignored. 8442f36895aSRobert Olsson */ 8452f36895aSRobert Olsson 8462f36895aSRobert Olsson for (i = 0; i < olen; i += 2) { 8472f36895aSRobert Olsson left = tnode_get_child(oldtnode, i); 8482f36895aSRobert Olsson right = tnode_get_child(oldtnode, i+1); 8492f36895aSRobert Olsson 8502f36895aSRobert Olsson /* Two nonempty children */ 8512f36895aSRobert Olsson if (left && right) { 8522f80b3c8SRobert Olsson struct tnode *newn; 8532f36895aSRobert Olsson 8542f80b3c8SRobert Olsson newn = tnode_new(left->key, tn->pos + tn->bits, 1); 8552f80b3c8SRobert Olsson 8562f80b3c8SRobert Olsson if (!newn) 8572f80b3c8SRobert Olsson goto nomem; 8582f80b3c8SRobert Olsson 859b299e4f0SDavid S. Miller put_child(t, tn, i/2, (struct rt_trie_node *)newn); 8602f36895aSRobert Olsson } 8612f36895aSRobert Olsson 8622f36895aSRobert Olsson } 86319baf839SRobert Olsson 86419baf839SRobert Olsson for (i = 0; i < olen; i += 2) { 86591b9a277SOlof Johansson struct tnode *newBinNode; 86691b9a277SOlof Johansson 86719baf839SRobert Olsson left = tnode_get_child(oldtnode, i); 86819baf839SRobert Olsson right = tnode_get_child(oldtnode, i+1); 86919baf839SRobert Olsson 87019baf839SRobert Olsson /* At least one of the children is empty */ 87119baf839SRobert Olsson if (left == NULL) { 87219baf839SRobert Olsson if (right == NULL) /* Both are empty */ 87319baf839SRobert Olsson continue; 87419baf839SRobert Olsson put_child(t, tn, i/2, right); 87591b9a277SOlof Johansson continue; 87691b9a277SOlof Johansson } 87791b9a277SOlof Johansson 87891b9a277SOlof Johansson if (right == NULL) { 87919baf839SRobert Olsson put_child(t, tn, i/2, left); 88091b9a277SOlof Johansson continue; 88191b9a277SOlof Johansson } 88219baf839SRobert Olsson 88319baf839SRobert Olsson /* Two nonempty children */ 88491b9a277SOlof Johansson newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 8852f36895aSRobert Olsson put_child(t, tn, i/2, NULL); 88619baf839SRobert Olsson put_child(t, newBinNode, 0, left); 88719baf839SRobert Olsson put_child(t, newBinNode, 1, right); 88819baf839SRobert Olsson put_child(t, tn, i/2, resize(t, newBinNode)); 88919baf839SRobert Olsson } 890e0f7cb8cSJarek Poplawski tnode_free_safe(oldtnode); 89119baf839SRobert Olsson return tn; 8922f80b3c8SRobert Olsson nomem: 8932f80b3c8SRobert Olsson { 8942f80b3c8SRobert Olsson int size = tnode_child_length(tn); 8952f80b3c8SRobert Olsson int j; 8962f80b3c8SRobert Olsson 8972f80b3c8SRobert Olsson for (j = 0; j < size; j++) 8982f80b3c8SRobert Olsson if (tn->child[j]) 8992f80b3c8SRobert Olsson tnode_free((struct tnode *)tn->child[j]); 9002f80b3c8SRobert Olsson 9012f80b3c8SRobert Olsson tnode_free(tn); 9022f80b3c8SRobert Olsson 9032f80b3c8SRobert Olsson return ERR_PTR(-ENOMEM); 9042f80b3c8SRobert Olsson } 90519baf839SRobert Olsson } 90619baf839SRobert Olsson 907772cb712SRobert Olsson /* readside must use rcu_read_lock currently dump routines 9082373ce1cSRobert Olsson via get_fa_head and dump */ 9092373ce1cSRobert Olsson 910772cb712SRobert Olsson static struct leaf_info *find_leaf_info(struct leaf *l, int plen) 91119baf839SRobert Olsson { 912772cb712SRobert Olsson struct hlist_head *head = &l->list; 91319baf839SRobert Olsson struct hlist_node *node; 91419baf839SRobert Olsson struct leaf_info *li; 91519baf839SRobert Olsson 9162373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, head, hlist) 91719baf839SRobert Olsson if (li->plen == plen) 91819baf839SRobert Olsson return li; 91991b9a277SOlof Johansson 92019baf839SRobert Olsson return NULL; 92119baf839SRobert Olsson } 92219baf839SRobert Olsson 92319baf839SRobert Olsson static inline struct list_head *get_fa_head(struct leaf *l, int plen) 92419baf839SRobert Olsson { 925772cb712SRobert Olsson struct leaf_info *li = find_leaf_info(l, plen); 92619baf839SRobert Olsson 92791b9a277SOlof Johansson if (!li) 92891b9a277SOlof Johansson return NULL; 92919baf839SRobert Olsson 93091b9a277SOlof Johansson return &li->falh; 93119baf839SRobert Olsson } 93219baf839SRobert Olsson 93319baf839SRobert Olsson static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 93419baf839SRobert Olsson { 93519baf839SRobert Olsson struct leaf_info *li = NULL, *last = NULL; 93691b9a277SOlof Johansson struct hlist_node *node; 93719baf839SRobert Olsson 93891b9a277SOlof Johansson if (hlist_empty(head)) { 9392373ce1cSRobert Olsson hlist_add_head_rcu(&new->hlist, head); 94091b9a277SOlof Johansson } else { 94191b9a277SOlof Johansson hlist_for_each_entry(li, node, head, hlist) { 94219baf839SRobert Olsson if (new->plen > li->plen) 94319baf839SRobert Olsson break; 94419baf839SRobert Olsson 94519baf839SRobert Olsson last = li; 94619baf839SRobert Olsson } 94719baf839SRobert Olsson if (last) 9482373ce1cSRobert Olsson hlist_add_after_rcu(&last->hlist, &new->hlist); 94919baf839SRobert Olsson else 9502373ce1cSRobert Olsson hlist_add_before_rcu(&new->hlist, &li->hlist); 95119baf839SRobert Olsson } 95219baf839SRobert Olsson } 95319baf839SRobert Olsson 9542373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 9552373ce1cSRobert Olsson 95619baf839SRobert Olsson static struct leaf * 95719baf839SRobert Olsson fib_find_node(struct trie *t, u32 key) 95819baf839SRobert Olsson { 95919baf839SRobert Olsson int pos; 96019baf839SRobert Olsson struct tnode *tn; 961b299e4f0SDavid S. Miller struct rt_trie_node *n; 96219baf839SRobert Olsson 96319baf839SRobert Olsson pos = 0; 964a034ee3cSEric Dumazet n = rcu_dereference_rtnl(t->trie); 96519baf839SRobert Olsson 96619baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 96719baf839SRobert Olsson tn = (struct tnode *) n; 96819baf839SRobert Olsson 96919baf839SRobert Olsson check_tnode(tn); 97019baf839SRobert Olsson 97119baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 97219baf839SRobert Olsson pos = tn->pos + tn->bits; 973a07f5f50SStephen Hemminger n = tnode_get_child_rcu(tn, 974a07f5f50SStephen Hemminger tkey_extract_bits(key, 975a07f5f50SStephen Hemminger tn->pos, 976a07f5f50SStephen Hemminger tn->bits)); 97791b9a277SOlof Johansson } else 97819baf839SRobert Olsson break; 97919baf839SRobert Olsson } 98019baf839SRobert Olsson /* Case we have found a leaf. Compare prefixes */ 98119baf839SRobert Olsson 98291b9a277SOlof Johansson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) 98391b9a277SOlof Johansson return (struct leaf *)n; 98491b9a277SOlof Johansson 98519baf839SRobert Olsson return NULL; 98619baf839SRobert Olsson } 98719baf839SRobert Olsson 9887b85576dSJarek Poplawski static void trie_rebalance(struct trie *t, struct tnode *tn) 98919baf839SRobert Olsson { 99019baf839SRobert Olsson int wasfull; 9913ed18d76SRobert Olsson t_key cindex, key; 99206801916SStephen Hemminger struct tnode *tp; 99319baf839SRobert Olsson 9943ed18d76SRobert Olsson key = tn->key; 9953ed18d76SRobert Olsson 996b299e4f0SDavid S. Miller while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { 99719baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 99819baf839SRobert Olsson wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 99919baf839SRobert Olsson tn = (struct tnode *) resize(t, (struct tnode *)tn); 1000a07f5f50SStephen Hemminger 1001a07f5f50SStephen Hemminger tnode_put_child_reorg((struct tnode *)tp, cindex, 1002b299e4f0SDavid S. Miller (struct rt_trie_node *)tn, wasfull); 100319baf839SRobert Olsson 1004b299e4f0SDavid S. Miller tp = node_parent((struct rt_trie_node *) tn); 1005008440e3SJarek Poplawski if (!tp) 1006b299e4f0SDavid S. Miller rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1007008440e3SJarek Poplawski 1008e0f7cb8cSJarek Poplawski tnode_free_flush(); 100906801916SStephen Hemminger if (!tp) 101019baf839SRobert Olsson break; 101106801916SStephen Hemminger tn = tp; 101219baf839SRobert Olsson } 101306801916SStephen Hemminger 101419baf839SRobert Olsson /* Handle last (top) tnode */ 10157b85576dSJarek Poplawski if (IS_TNODE(tn)) 101619baf839SRobert Olsson tn = (struct tnode *)resize(t, (struct tnode *)tn); 101719baf839SRobert Olsson 1018b299e4f0SDavid S. Miller rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 10197b85576dSJarek Poplawski tnode_free_flush(); 102019baf839SRobert Olsson } 102119baf839SRobert Olsson 10222373ce1cSRobert Olsson /* only used from updater-side */ 10232373ce1cSRobert Olsson 1024fea86ad8SStephen Hemminger static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 102519baf839SRobert Olsson { 102619baf839SRobert Olsson int pos, newpos; 102719baf839SRobert Olsson struct tnode *tp = NULL, *tn = NULL; 1028b299e4f0SDavid S. Miller struct rt_trie_node *n; 102919baf839SRobert Olsson struct leaf *l; 103019baf839SRobert Olsson int missbit; 103119baf839SRobert Olsson struct list_head *fa_head = NULL; 103219baf839SRobert Olsson struct leaf_info *li; 103319baf839SRobert Olsson t_key cindex; 103419baf839SRobert Olsson 103519baf839SRobert Olsson pos = 0; 103619baf839SRobert Olsson n = t->trie; 103719baf839SRobert Olsson 103819baf839SRobert Olsson /* If we point to NULL, stop. Either the tree is empty and we should 103919baf839SRobert Olsson * just put a new leaf in if, or we have reached an empty child slot, 104019baf839SRobert Olsson * and we should just put our new leaf in that. 104119baf839SRobert Olsson * If we point to a T_TNODE, check if it matches our key. Note that 104219baf839SRobert Olsson * a T_TNODE might be skipping any number of bits - its 'pos' need 104319baf839SRobert Olsson * not be the parent's 'pos'+'bits'! 104419baf839SRobert Olsson * 104519baf839SRobert Olsson * If it does match the current key, get pos/bits from it, extract 104619baf839SRobert Olsson * the index from our key, push the T_TNODE and walk the tree. 104719baf839SRobert Olsson * 104819baf839SRobert Olsson * If it doesn't, we have to replace it with a new T_TNODE. 104919baf839SRobert Olsson * 105019baf839SRobert Olsson * If we point to a T_LEAF, it might or might not have the same key 105119baf839SRobert Olsson * as we do. If it does, just change the value, update the T_LEAF's 105219baf839SRobert Olsson * value, and return it. 105319baf839SRobert Olsson * If it doesn't, we need to replace it with a T_TNODE. 105419baf839SRobert Olsson */ 105519baf839SRobert Olsson 105619baf839SRobert Olsson while (n != NULL && NODE_TYPE(n) == T_TNODE) { 105719baf839SRobert Olsson tn = (struct tnode *) n; 105819baf839SRobert Olsson 105919baf839SRobert Olsson check_tnode(tn); 106019baf839SRobert Olsson 106119baf839SRobert Olsson if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 106219baf839SRobert Olsson tp = tn; 106319baf839SRobert Olsson pos = tn->pos + tn->bits; 1064a07f5f50SStephen Hemminger n = tnode_get_child(tn, 1065a07f5f50SStephen Hemminger tkey_extract_bits(key, 1066a07f5f50SStephen Hemminger tn->pos, 1067a07f5f50SStephen Hemminger tn->bits)); 106819baf839SRobert Olsson 106906801916SStephen Hemminger BUG_ON(n && node_parent(n) != tn); 107091b9a277SOlof Johansson } else 107119baf839SRobert Olsson break; 107219baf839SRobert Olsson } 107319baf839SRobert Olsson 107419baf839SRobert Olsson /* 107519baf839SRobert Olsson * n ----> NULL, LEAF or TNODE 107619baf839SRobert Olsson * 107719baf839SRobert Olsson * tp is n's (parent) ----> NULL or TNODE 107819baf839SRobert Olsson */ 107919baf839SRobert Olsson 108091b9a277SOlof Johansson BUG_ON(tp && IS_LEAF(tp)); 108119baf839SRobert Olsson 108219baf839SRobert Olsson /* Case 1: n is a leaf. Compare prefixes */ 108319baf839SRobert Olsson 108419baf839SRobert Olsson if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1085c95aaf9aSStephen Hemminger l = (struct leaf *) n; 108619baf839SRobert Olsson li = leaf_info_new(plen); 108719baf839SRobert Olsson 1088fea86ad8SStephen Hemminger if (!li) 1089fea86ad8SStephen Hemminger return NULL; 109019baf839SRobert Olsson 109119baf839SRobert Olsson fa_head = &li->falh; 109219baf839SRobert Olsson insert_leaf_info(&l->list, li); 109319baf839SRobert Olsson goto done; 109419baf839SRobert Olsson } 109519baf839SRobert Olsson l = leaf_new(); 109619baf839SRobert Olsson 1097fea86ad8SStephen Hemminger if (!l) 1098fea86ad8SStephen Hemminger return NULL; 109919baf839SRobert Olsson 110019baf839SRobert Olsson l->key = key; 110119baf839SRobert Olsson li = leaf_info_new(plen); 110219baf839SRobert Olsson 1103f835e471SRobert Olsson if (!li) { 1104387a5487SStephen Hemminger free_leaf(l); 1105fea86ad8SStephen Hemminger return NULL; 1106f835e471SRobert Olsson } 110719baf839SRobert Olsson 110819baf839SRobert Olsson fa_head = &li->falh; 110919baf839SRobert Olsson insert_leaf_info(&l->list, li); 111019baf839SRobert Olsson 111119baf839SRobert Olsson if (t->trie && n == NULL) { 111291b9a277SOlof Johansson /* Case 2: n is NULL, and will just insert a new leaf */ 111319baf839SRobert Olsson 1114b299e4f0SDavid S. Miller node_set_parent((struct rt_trie_node *)l, tp); 111519baf839SRobert Olsson 111619baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1117b299e4f0SDavid S. Miller put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); 111891b9a277SOlof Johansson } else { 111919baf839SRobert Olsson /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 112019baf839SRobert Olsson /* 112119baf839SRobert Olsson * Add a new tnode here 112219baf839SRobert Olsson * first tnode need some special handling 112319baf839SRobert Olsson */ 112419baf839SRobert Olsson 112519baf839SRobert Olsson if (tp) 112619baf839SRobert Olsson pos = tp->pos+tp->bits; 112719baf839SRobert Olsson else 112819baf839SRobert Olsson pos = 0; 112991b9a277SOlof Johansson 113019baf839SRobert Olsson if (n) { 113119baf839SRobert Olsson newpos = tkey_mismatch(key, pos, n->key); 113219baf839SRobert Olsson tn = tnode_new(n->key, newpos, 1); 113391b9a277SOlof Johansson } else { 113419baf839SRobert Olsson newpos = 0; 113519baf839SRobert Olsson tn = tnode_new(key, newpos, 1); /* First tnode */ 113619baf839SRobert Olsson } 1137f835e471SRobert Olsson 1138f835e471SRobert Olsson if (!tn) { 1139f835e471SRobert Olsson free_leaf_info(li); 1140387a5487SStephen Hemminger free_leaf(l); 1141fea86ad8SStephen Hemminger return NULL; 1142f835e471SRobert Olsson } 114319baf839SRobert Olsson 1144b299e4f0SDavid S. Miller node_set_parent((struct rt_trie_node *)tn, tp); 114519baf839SRobert Olsson 114619baf839SRobert Olsson missbit = tkey_extract_bits(key, newpos, 1); 1147b299e4f0SDavid S. Miller put_child(t, tn, missbit, (struct rt_trie_node *)l); 114819baf839SRobert Olsson put_child(t, tn, 1-missbit, n); 114919baf839SRobert Olsson 115019baf839SRobert Olsson if (tp) { 115119baf839SRobert Olsson cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1152a07f5f50SStephen Hemminger put_child(t, (struct tnode *)tp, cindex, 1153b299e4f0SDavid S. Miller (struct rt_trie_node *)tn); 115491b9a277SOlof Johansson } else { 1155b299e4f0SDavid S. Miller rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 115619baf839SRobert Olsson tp = tn; 115719baf839SRobert Olsson } 115819baf839SRobert Olsson } 115991b9a277SOlof Johansson 116091b9a277SOlof Johansson if (tp && tp->pos + tp->bits > 32) 1161a07f5f50SStephen Hemminger pr_warning("fib_trie" 1162a07f5f50SStephen Hemminger " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 116319baf839SRobert Olsson tp, tp->pos, tp->bits, key, plen); 116491b9a277SOlof Johansson 116519baf839SRobert Olsson /* Rebalance the trie */ 11662373ce1cSRobert Olsson 11677b85576dSJarek Poplawski trie_rebalance(t, tp); 1168f835e471SRobert Olsson done: 116919baf839SRobert Olsson return fa_head; 117019baf839SRobert Olsson } 117119baf839SRobert Olsson 1172d562f1f8SRobert Olsson /* 1173d562f1f8SRobert Olsson * Caller must hold RTNL. 1174d562f1f8SRobert Olsson */ 117516c6cf8bSStephen Hemminger int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 117619baf839SRobert Olsson { 117719baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 117819baf839SRobert Olsson struct fib_alias *fa, *new_fa; 117919baf839SRobert Olsson struct list_head *fa_head = NULL; 118019baf839SRobert Olsson struct fib_info *fi; 11814e902c57SThomas Graf int plen = cfg->fc_dst_len; 11824e902c57SThomas Graf u8 tos = cfg->fc_tos; 118319baf839SRobert Olsson u32 key, mask; 118419baf839SRobert Olsson int err; 118519baf839SRobert Olsson struct leaf *l; 118619baf839SRobert Olsson 118719baf839SRobert Olsson if (plen > 32) 118819baf839SRobert Olsson return -EINVAL; 118919baf839SRobert Olsson 11904e902c57SThomas Graf key = ntohl(cfg->fc_dst); 119119baf839SRobert Olsson 11922dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 119319baf839SRobert Olsson 119419baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 119519baf839SRobert Olsson 119619baf839SRobert Olsson if (key & ~mask) 119719baf839SRobert Olsson return -EINVAL; 119819baf839SRobert Olsson 119919baf839SRobert Olsson key = key & mask; 120019baf839SRobert Olsson 12014e902c57SThomas Graf fi = fib_create_info(cfg); 12024e902c57SThomas Graf if (IS_ERR(fi)) { 12034e902c57SThomas Graf err = PTR_ERR(fi); 120419baf839SRobert Olsson goto err; 12054e902c57SThomas Graf } 120619baf839SRobert Olsson 120719baf839SRobert Olsson l = fib_find_node(t, key); 120819baf839SRobert Olsson fa = NULL; 120919baf839SRobert Olsson 121019baf839SRobert Olsson if (l) { 121119baf839SRobert Olsson fa_head = get_fa_head(l, plen); 121219baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, fi->fib_priority); 121319baf839SRobert Olsson } 121419baf839SRobert Olsson 121519baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 121619baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 121719baf839SRobert Olsson * exists or to the node before which we will insert new one. 121819baf839SRobert Olsson * 121919baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 122019baf839SRobert Olsson * insert to the head of f. 122119baf839SRobert Olsson * 122219baf839SRobert Olsson * If f is NULL, no fib node matched the destination key 122319baf839SRobert Olsson * and we need to allocate a new one of those as well. 122419baf839SRobert Olsson */ 122519baf839SRobert Olsson 1226936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1227936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1228936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 122919baf839SRobert Olsson 123019baf839SRobert Olsson err = -EEXIST; 12314e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 123219baf839SRobert Olsson goto out; 123319baf839SRobert Olsson 1234936f6f8eSJulian Anastasov /* We have 2 goals: 1235936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1236936f6f8eSJulian Anastasov * duplicate routes 1237936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1238936f6f8eSJulian Anastasov */ 1239936f6f8eSJulian Anastasov fa_match = NULL; 1240936f6f8eSJulian Anastasov fa_first = fa; 1241936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1242936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 1243936f6f8eSJulian Anastasov if (fa->fa_tos != tos) 1244936f6f8eSJulian Anastasov break; 1245936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1246936f6f8eSJulian Anastasov break; 1247936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1248936f6f8eSJulian Anastasov fa->fa_scope == cfg->fc_scope && 1249936f6f8eSJulian Anastasov fa->fa_info == fi) { 1250936f6f8eSJulian Anastasov fa_match = fa; 1251936f6f8eSJulian Anastasov break; 1252936f6f8eSJulian Anastasov } 1253936f6f8eSJulian Anastasov } 1254936f6f8eSJulian Anastasov 12554e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 125619baf839SRobert Olsson struct fib_info *fi_drop; 125719baf839SRobert Olsson u8 state; 125819baf839SRobert Olsson 1259936f6f8eSJulian Anastasov fa = fa_first; 1260936f6f8eSJulian Anastasov if (fa_match) { 1261936f6f8eSJulian Anastasov if (fa == fa_match) 1262936f6f8eSJulian Anastasov err = 0; 12636725033fSJoonwoo Park goto out; 1264936f6f8eSJulian Anastasov } 12652373ce1cSRobert Olsson err = -ENOBUFS; 1266e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 12672373ce1cSRobert Olsson if (new_fa == NULL) 12682373ce1cSRobert Olsson goto out; 126919baf839SRobert Olsson 127019baf839SRobert Olsson fi_drop = fa->fa_info; 12712373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12722373ce1cSRobert Olsson new_fa->fa_info = fi; 12734e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 12744e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 127519baf839SRobert Olsson state = fa->fa_state; 1276936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 127719baf839SRobert Olsson 12782373ce1cSRobert Olsson list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12792373ce1cSRobert Olsson alias_free_mem_rcu(fa); 128019baf839SRobert Olsson 128119baf839SRobert Olsson fib_release_info(fi_drop); 128219baf839SRobert Olsson if (state & FA_S_ACCESSED) 128376e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 1284b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1285b8f55831SMilan Kocian tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 128619baf839SRobert Olsson 128719baf839SRobert Olsson goto succeeded; 128819baf839SRobert Olsson } 128919baf839SRobert Olsson /* Error if we find a perfect match which 129019baf839SRobert Olsson * uses the same scope, type, and nexthop 129119baf839SRobert Olsson * information. 129219baf839SRobert Olsson */ 1293936f6f8eSJulian Anastasov if (fa_match) 129419baf839SRobert Olsson goto out; 1295a07f5f50SStephen Hemminger 12964e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1297936f6f8eSJulian Anastasov fa = fa_first; 129819baf839SRobert Olsson } 129919baf839SRobert Olsson err = -ENOENT; 13004e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 130119baf839SRobert Olsson goto out; 130219baf839SRobert Olsson 130319baf839SRobert Olsson err = -ENOBUFS; 1304e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 130519baf839SRobert Olsson if (new_fa == NULL) 130619baf839SRobert Olsson goto out; 130719baf839SRobert Olsson 130819baf839SRobert Olsson new_fa->fa_info = fi; 130919baf839SRobert Olsson new_fa->fa_tos = tos; 13104e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 13114e902c57SThomas Graf new_fa->fa_scope = cfg->fc_scope; 131219baf839SRobert Olsson new_fa->fa_state = 0; 131319baf839SRobert Olsson /* 131419baf839SRobert Olsson * Insert new entry to the list. 131519baf839SRobert Olsson */ 131619baf839SRobert Olsson 1317f835e471SRobert Olsson if (!fa_head) { 1318fea86ad8SStephen Hemminger fa_head = fib_insert_node(t, key, plen); 1319fea86ad8SStephen Hemminger if (unlikely(!fa_head)) { 1320fea86ad8SStephen Hemminger err = -ENOMEM; 1321f835e471SRobert Olsson goto out_free_new_fa; 1322f835e471SRobert Olsson } 1323fea86ad8SStephen Hemminger } 132419baf839SRobert Olsson 13252373ce1cSRobert Olsson list_add_tail_rcu(&new_fa->fa_list, 13262373ce1cSRobert Olsson (fa ? &fa->fa_list : fa_head)); 132719baf839SRobert Olsson 132876e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 13294e902c57SThomas Graf rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1330b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 133119baf839SRobert Olsson succeeded: 133219baf839SRobert Olsson return 0; 1333f835e471SRobert Olsson 1334f835e471SRobert Olsson out_free_new_fa: 1335f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 133619baf839SRobert Olsson out: 133719baf839SRobert Olsson fib_release_info(fi); 133891b9a277SOlof Johansson err: 133919baf839SRobert Olsson return err; 134019baf839SRobert Olsson } 134119baf839SRobert Olsson 1342772cb712SRobert Olsson /* should be called with rcu_read_lock */ 13435b470441SDavid S. Miller static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, 1344a07f5f50SStephen Hemminger t_key key, const struct flowi *flp, 1345ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 134619baf839SRobert Olsson { 134719baf839SRobert Olsson struct leaf_info *li; 134819baf839SRobert Olsson struct hlist_head *hhead = &l->list; 134919baf839SRobert Olsson struct hlist_node *node; 135019baf839SRobert Olsson 13512373ce1cSRobert Olsson hlist_for_each_entry_rcu(li, node, hhead, hlist) { 13523be0686bSDavid S. Miller struct fib_alias *fa; 1353a07f5f50SStephen Hemminger int plen = li->plen; 1354a07f5f50SStephen Hemminger __be32 mask = inet_make_mask(plen); 1355a07f5f50SStephen Hemminger 1356888454c5SAl Viro if (l->key != (key & ntohl(mask))) 135719baf839SRobert Olsson continue; 135819baf839SRobert Olsson 13593be0686bSDavid S. Miller list_for_each_entry_rcu(fa, &li->falh, fa_list) { 13603be0686bSDavid S. Miller struct fib_info *fi = fa->fa_info; 13613be0686bSDavid S. Miller int nhsel, err; 1362a07f5f50SStephen Hemminger 13633be0686bSDavid S. Miller if (fa->fa_tos && fa->fa_tos != flp->fl4_tos) 13643be0686bSDavid S. Miller continue; 13653be0686bSDavid S. Miller if (fa->fa_scope < flp->fl4_scope) 13663be0686bSDavid S. Miller continue; 13673be0686bSDavid S. Miller fib_alias_accessed(fa); 13683be0686bSDavid S. Miller err = fib_props[fa->fa_type].error; 13693be0686bSDavid S. Miller if (err) { 137019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 137119baf839SRobert Olsson t->stats.semantic_match_miss++; 137219baf839SRobert Olsson #endif 13733be0686bSDavid S. Miller return 1; 13743be0686bSDavid S. Miller } 13753be0686bSDavid S. Miller if (fi->fib_flags & RTNH_F_DEAD) 13763be0686bSDavid S. Miller continue; 13773be0686bSDavid S. Miller for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 13783be0686bSDavid S. Miller const struct fib_nh *nh = &fi->fib_nh[nhsel]; 13793be0686bSDavid S. Miller 13803be0686bSDavid S. Miller if (nh->nh_flags & RTNH_F_DEAD) 13813be0686bSDavid S. Miller continue; 13823be0686bSDavid S. Miller if (flp->oif && flp->oif != nh->nh_oif) 13833be0686bSDavid S. Miller continue; 13843be0686bSDavid S. Miller 13853be0686bSDavid S. Miller #ifdef CONFIG_IP_FIB_TRIE_STATS 13863be0686bSDavid S. Miller t->stats.semantic_match_passed++; 13873be0686bSDavid S. Miller #endif 13883be0686bSDavid S. Miller res->prefixlen = plen; 13893be0686bSDavid S. Miller res->nh_sel = nhsel; 13903be0686bSDavid S. Miller res->type = fa->fa_type; 13913be0686bSDavid S. Miller res->scope = fa->fa_scope; 13923be0686bSDavid S. Miller res->fi = fi; 13933be0686bSDavid S. Miller res->table = tb; 13943be0686bSDavid S. Miller res->fa_head = &li->falh; 13953be0686bSDavid S. Miller if (!(fib_flags & FIB_LOOKUP_NOREF)) 13963be0686bSDavid S. Miller atomic_inc(&res->fi->fib_clntref); 13973be0686bSDavid S. Miller return 0; 13983be0686bSDavid S. Miller } 13993be0686bSDavid S. Miller } 14003be0686bSDavid S. Miller 14013be0686bSDavid S. Miller #ifdef CONFIG_IP_FIB_TRIE_STATS 14023be0686bSDavid S. Miller t->stats.semantic_match_miss++; 14033be0686bSDavid S. Miller #endif 140419baf839SRobert Olsson } 140519baf839SRobert Olsson 14062e655571SBen Hutchings return 1; 1407a07f5f50SStephen Hemminger } 1408a07f5f50SStephen Hemminger 140916c6cf8bSStephen Hemminger int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1410ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 141119baf839SRobert Olsson { 141219baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 14132e655571SBen Hutchings int ret; 1414b299e4f0SDavid S. Miller struct rt_trie_node *n; 141519baf839SRobert Olsson struct tnode *pn; 14163b004569SDavid S. Miller unsigned int pos, bits; 141719baf839SRobert Olsson t_key key = ntohl(flp->fl4_dst); 14183b004569SDavid S. Miller unsigned int chopped_off; 141919baf839SRobert Olsson t_key cindex = 0; 14203b004569SDavid S. Miller unsigned int current_prefix_length = KEYLENGTH; 142191b9a277SOlof Johansson struct tnode *cn; 1422874ffa8fSEric Dumazet t_key pref_mismatch; 142391b9a277SOlof Johansson 14242373ce1cSRobert Olsson rcu_read_lock(); 142519baf839SRobert Olsson 14262373ce1cSRobert Olsson n = rcu_dereference(t->trie); 142719baf839SRobert Olsson if (!n) 142819baf839SRobert Olsson goto failed; 142919baf839SRobert Olsson 143019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 143119baf839SRobert Olsson t->stats.gets++; 143219baf839SRobert Olsson #endif 143319baf839SRobert Olsson 143419baf839SRobert Olsson /* Just a leaf? */ 143519baf839SRobert Olsson if (IS_LEAF(n)) { 14365b470441SDavid S. Miller ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); 1437a07f5f50SStephen Hemminger goto found; 143819baf839SRobert Olsson } 1439a07f5f50SStephen Hemminger 144019baf839SRobert Olsson pn = (struct tnode *) n; 144119baf839SRobert Olsson chopped_off = 0; 144219baf839SRobert Olsson 144319baf839SRobert Olsson while (pn) { 144419baf839SRobert Olsson pos = pn->pos; 144519baf839SRobert Olsson bits = pn->bits; 144619baf839SRobert Olsson 144719baf839SRobert Olsson if (!chopped_off) 1448ab66b4a7SStephen Hemminger cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1449ab66b4a7SStephen Hemminger pos, bits); 145019baf839SRobert Olsson 1451b902e573SJarek Poplawski n = tnode_get_child_rcu(pn, cindex); 145219baf839SRobert Olsson 145319baf839SRobert Olsson if (n == NULL) { 145419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 145519baf839SRobert Olsson t->stats.null_node_hit++; 145619baf839SRobert Olsson #endif 145719baf839SRobert Olsson goto backtrace; 145819baf839SRobert Olsson } 145919baf839SRobert Olsson 146091b9a277SOlof Johansson if (IS_LEAF(n)) { 14615b470441SDavid S. Miller ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); 14622e655571SBen Hutchings if (ret > 0) 146391b9a277SOlof Johansson goto backtrace; 1464a07f5f50SStephen Hemminger goto found; 146591b9a277SOlof Johansson } 146691b9a277SOlof Johansson 146791b9a277SOlof Johansson cn = (struct tnode *)n; 146819baf839SRobert Olsson 146919baf839SRobert Olsson /* 147019baf839SRobert Olsson * It's a tnode, and we can do some extra checks here if we 147119baf839SRobert Olsson * like, to avoid descending into a dead-end branch. 147219baf839SRobert Olsson * This tnode is in the parent's child array at index 147319baf839SRobert Olsson * key[p_pos..p_pos+p_bits] but potentially with some bits 147419baf839SRobert Olsson * chopped off, so in reality the index may be just a 147519baf839SRobert Olsson * subprefix, padded with zero at the end. 147619baf839SRobert Olsson * We can also take a look at any skipped bits in this 147719baf839SRobert Olsson * tnode - everything up to p_pos is supposed to be ok, 147819baf839SRobert Olsson * and the non-chopped bits of the index (se previous 147919baf839SRobert Olsson * paragraph) are also guaranteed ok, but the rest is 148019baf839SRobert Olsson * considered unknown. 148119baf839SRobert Olsson * 148219baf839SRobert Olsson * The skipped bits are key[pos+bits..cn->pos]. 148319baf839SRobert Olsson */ 148419baf839SRobert Olsson 148519baf839SRobert Olsson /* If current_prefix_length < pos+bits, we are already doing 148619baf839SRobert Olsson * actual prefix matching, which means everything from 148719baf839SRobert Olsson * pos+(bits-chopped_off) onward must be zero along some 148819baf839SRobert Olsson * branch of this subtree - otherwise there is *no* valid 148919baf839SRobert Olsson * prefix present. Here we can only check the skipped 149019baf839SRobert Olsson * bits. Remember, since we have already indexed into the 149119baf839SRobert Olsson * parent's child array, we know that the bits we chopped of 149219baf839SRobert Olsson * *are* zero. 149319baf839SRobert Olsson */ 149419baf839SRobert Olsson 1495a07f5f50SStephen Hemminger /* NOTA BENE: Checking only skipped bits 1496a07f5f50SStephen Hemminger for the new node here */ 149719baf839SRobert Olsson 149819baf839SRobert Olsson if (current_prefix_length < pos+bits) { 149919baf839SRobert Olsson if (tkey_extract_bits(cn->key, current_prefix_length, 1500a07f5f50SStephen Hemminger cn->pos - current_prefix_length) 1501a07f5f50SStephen Hemminger || !(cn->child[0])) 150219baf839SRobert Olsson goto backtrace; 150319baf839SRobert Olsson } 150419baf839SRobert Olsson 150519baf839SRobert Olsson /* 150619baf839SRobert Olsson * If chopped_off=0, the index is fully validated and we 150719baf839SRobert Olsson * only need to look at the skipped bits for this, the new, 150819baf839SRobert Olsson * tnode. What we actually want to do is to find out if 150919baf839SRobert Olsson * these skipped bits match our key perfectly, or if we will 151019baf839SRobert Olsson * have to count on finding a matching prefix further down, 151119baf839SRobert Olsson * because if we do, we would like to have some way of 151219baf839SRobert Olsson * verifying the existence of such a prefix at this point. 151319baf839SRobert Olsson */ 151419baf839SRobert Olsson 151519baf839SRobert Olsson /* The only thing we can do at this point is to verify that 151619baf839SRobert Olsson * any such matching prefix can indeed be a prefix to our 151719baf839SRobert Olsson * key, and if the bits in the node we are inspecting that 151819baf839SRobert Olsson * do not match our key are not ZERO, this cannot be true. 151919baf839SRobert Olsson * Thus, find out where there is a mismatch (before cn->pos) 152019baf839SRobert Olsson * and verify that all the mismatching bits are zero in the 152119baf839SRobert Olsson * new tnode's key. 152219baf839SRobert Olsson */ 152319baf839SRobert Olsson 1524a07f5f50SStephen Hemminger /* 1525a07f5f50SStephen Hemminger * Note: We aren't very concerned about the piece of 1526a07f5f50SStephen Hemminger * the key that precede pn->pos+pn->bits, since these 1527a07f5f50SStephen Hemminger * have already been checked. The bits after cn->pos 1528a07f5f50SStephen Hemminger * aren't checked since these are by definition 1529a07f5f50SStephen Hemminger * "unknown" at this point. Thus, what we want to see 1530a07f5f50SStephen Hemminger * is if we are about to enter the "prefix matching" 1531a07f5f50SStephen Hemminger * state, and in that case verify that the skipped 1532a07f5f50SStephen Hemminger * bits that will prevail throughout this subtree are 1533a07f5f50SStephen Hemminger * zero, as they have to be if we are to find a 1534a07f5f50SStephen Hemminger * matching prefix. 153519baf839SRobert Olsson */ 153619baf839SRobert Olsson 1537874ffa8fSEric Dumazet pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); 153819baf839SRobert Olsson 1539a07f5f50SStephen Hemminger /* 1540a07f5f50SStephen Hemminger * In short: If skipped bits in this node do not match 1541a07f5f50SStephen Hemminger * the search key, enter the "prefix matching" 1542a07f5f50SStephen Hemminger * state.directly. 154319baf839SRobert Olsson */ 154419baf839SRobert Olsson if (pref_mismatch) { 1545874ffa8fSEric Dumazet int mp = KEYLENGTH - fls(pref_mismatch); 154619baf839SRobert Olsson 1547874ffa8fSEric Dumazet if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) 154819baf839SRobert Olsson goto backtrace; 154919baf839SRobert Olsson 155019baf839SRobert Olsson if (current_prefix_length >= cn->pos) 155119baf839SRobert Olsson current_prefix_length = mp; 155219baf839SRobert Olsson } 1553a07f5f50SStephen Hemminger 155419baf839SRobert Olsson pn = (struct tnode *)n; /* Descend */ 155519baf839SRobert Olsson chopped_off = 0; 155619baf839SRobert Olsson continue; 155791b9a277SOlof Johansson 155819baf839SRobert Olsson backtrace: 155919baf839SRobert Olsson chopped_off++; 156019baf839SRobert Olsson 156119baf839SRobert Olsson /* As zero don't change the child key (cindex) */ 1562a07f5f50SStephen Hemminger while ((chopped_off <= pn->bits) 1563a07f5f50SStephen Hemminger && !(cindex & (1<<(chopped_off-1)))) 156419baf839SRobert Olsson chopped_off++; 156519baf839SRobert Olsson 156619baf839SRobert Olsson /* Decrease current_... with bits chopped off */ 156719baf839SRobert Olsson if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1568a07f5f50SStephen Hemminger current_prefix_length = pn->pos + pn->bits 1569a07f5f50SStephen Hemminger - chopped_off; 157019baf839SRobert Olsson 157119baf839SRobert Olsson /* 157219baf839SRobert Olsson * Either we do the actual chop off according or if we have 157319baf839SRobert Olsson * chopped off all bits in this tnode walk up to our parent. 157419baf839SRobert Olsson */ 157519baf839SRobert Olsson 157691b9a277SOlof Johansson if (chopped_off <= pn->bits) { 157719baf839SRobert Olsson cindex &= ~(1 << (chopped_off-1)); 157891b9a277SOlof Johansson } else { 1579b299e4f0SDavid S. Miller struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); 158006801916SStephen Hemminger if (!parent) 158119baf839SRobert Olsson goto failed; 158219baf839SRobert Olsson 158319baf839SRobert Olsson /* Get Child's index */ 158406801916SStephen Hemminger cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); 158506801916SStephen Hemminger pn = parent; 158619baf839SRobert Olsson chopped_off = 0; 158719baf839SRobert Olsson 158819baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 158919baf839SRobert Olsson t->stats.backtrack++; 159019baf839SRobert Olsson #endif 159119baf839SRobert Olsson goto backtrace; 159219baf839SRobert Olsson } 159319baf839SRobert Olsson } 159419baf839SRobert Olsson failed: 159519baf839SRobert Olsson ret = 1; 159619baf839SRobert Olsson found: 15972373ce1cSRobert Olsson rcu_read_unlock(); 159819baf839SRobert Olsson return ret; 159919baf839SRobert Olsson } 160019baf839SRobert Olsson 160119baf839SRobert Olsson /* 16029195bef7SStephen Hemminger * Remove the leaf and return parent. 160319baf839SRobert Olsson */ 16049195bef7SStephen Hemminger static void trie_leaf_remove(struct trie *t, struct leaf *l) 16059195bef7SStephen Hemminger { 1606b299e4f0SDavid S. Miller struct tnode *tp = node_parent((struct rt_trie_node *) l); 16079195bef7SStephen Hemminger 16089195bef7SStephen Hemminger pr_debug("entering trie_leaf_remove(%p)\n", l); 160919baf839SRobert Olsson 161019baf839SRobert Olsson if (tp) { 16119195bef7SStephen Hemminger t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); 161219baf839SRobert Olsson put_child(t, (struct tnode *)tp, cindex, NULL); 16137b85576dSJarek Poplawski trie_rebalance(t, tp); 161491b9a277SOlof Johansson } else 16152373ce1cSRobert Olsson rcu_assign_pointer(t->trie, NULL); 161619baf839SRobert Olsson 1617387a5487SStephen Hemminger free_leaf(l); 161819baf839SRobert Olsson } 161919baf839SRobert Olsson 1620d562f1f8SRobert Olsson /* 1621d562f1f8SRobert Olsson * Caller must hold RTNL. 1622d562f1f8SRobert Olsson */ 162316c6cf8bSStephen Hemminger int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) 162419baf839SRobert Olsson { 162519baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 162619baf839SRobert Olsson u32 key, mask; 16274e902c57SThomas Graf int plen = cfg->fc_dst_len; 16284e902c57SThomas Graf u8 tos = cfg->fc_tos; 162919baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 163019baf839SRobert Olsson struct list_head *fa_head; 163119baf839SRobert Olsson struct leaf *l; 163291b9a277SOlof Johansson struct leaf_info *li; 163391b9a277SOlof Johansson 163419baf839SRobert Olsson if (plen > 32) 163519baf839SRobert Olsson return -EINVAL; 163619baf839SRobert Olsson 16374e902c57SThomas Graf key = ntohl(cfg->fc_dst); 163819baf839SRobert Olsson mask = ntohl(inet_make_mask(plen)); 163919baf839SRobert Olsson 164019baf839SRobert Olsson if (key & ~mask) 164119baf839SRobert Olsson return -EINVAL; 164219baf839SRobert Olsson 164319baf839SRobert Olsson key = key & mask; 164419baf839SRobert Olsson l = fib_find_node(t, key); 164519baf839SRobert Olsson 164619baf839SRobert Olsson if (!l) 164719baf839SRobert Olsson return -ESRCH; 164819baf839SRobert Olsson 164919baf839SRobert Olsson fa_head = get_fa_head(l, plen); 165019baf839SRobert Olsson fa = fib_find_alias(fa_head, tos, 0); 165119baf839SRobert Olsson 165219baf839SRobert Olsson if (!fa) 165319baf839SRobert Olsson return -ESRCH; 165419baf839SRobert Olsson 16550c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 165619baf839SRobert Olsson 165719baf839SRobert Olsson fa_to_delete = NULL; 1658936f6f8eSJulian Anastasov fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1659936f6f8eSJulian Anastasov list_for_each_entry_continue(fa, fa_head, fa_list) { 166019baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 166119baf839SRobert Olsson 166219baf839SRobert Olsson if (fa->fa_tos != tos) 166319baf839SRobert Olsson break; 166419baf839SRobert Olsson 16654e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 16664e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 16674e902c57SThomas Graf fa->fa_scope == cfg->fc_scope) && 16684e902c57SThomas Graf (!cfg->fc_protocol || 16694e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 16704e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 167119baf839SRobert Olsson fa_to_delete = fa; 167219baf839SRobert Olsson break; 167319baf839SRobert Olsson } 167419baf839SRobert Olsson } 167519baf839SRobert Olsson 167691b9a277SOlof Johansson if (!fa_to_delete) 167791b9a277SOlof Johansson return -ESRCH; 167819baf839SRobert Olsson 167919baf839SRobert Olsson fa = fa_to_delete; 16804e902c57SThomas Graf rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1681b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 168219baf839SRobert Olsson 168319baf839SRobert Olsson l = fib_find_node(t, key); 1684772cb712SRobert Olsson li = find_leaf_info(l, plen); 168519baf839SRobert Olsson 16862373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 168719baf839SRobert Olsson 168819baf839SRobert Olsson if (list_empty(fa_head)) { 16892373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 169019baf839SRobert Olsson free_leaf_info(li); 16912373ce1cSRobert Olsson } 169219baf839SRobert Olsson 169319baf839SRobert Olsson if (hlist_empty(&l->list)) 16949195bef7SStephen Hemminger trie_leaf_remove(t, l); 169519baf839SRobert Olsson 169619baf839SRobert Olsson if (fa->fa_state & FA_S_ACCESSED) 169776e6ebfbSDenis V. Lunev rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 169819baf839SRobert Olsson 16992373ce1cSRobert Olsson fib_release_info(fa->fa_info); 17002373ce1cSRobert Olsson alias_free_mem_rcu(fa); 170119baf839SRobert Olsson return 0; 170219baf839SRobert Olsson } 170319baf839SRobert Olsson 1704ef3660ceSStephen Hemminger static int trie_flush_list(struct list_head *head) 170519baf839SRobert Olsson { 170619baf839SRobert Olsson struct fib_alias *fa, *fa_node; 170719baf839SRobert Olsson int found = 0; 170819baf839SRobert Olsson 170919baf839SRobert Olsson list_for_each_entry_safe(fa, fa_node, head, fa_list) { 171019baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 171119baf839SRobert Olsson 171219baf839SRobert Olsson if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 17132373ce1cSRobert Olsson list_del_rcu(&fa->fa_list); 17142373ce1cSRobert Olsson fib_release_info(fa->fa_info); 17152373ce1cSRobert Olsson alias_free_mem_rcu(fa); 171619baf839SRobert Olsson found++; 171719baf839SRobert Olsson } 171819baf839SRobert Olsson } 171919baf839SRobert Olsson return found; 172019baf839SRobert Olsson } 172119baf839SRobert Olsson 1722ef3660ceSStephen Hemminger static int trie_flush_leaf(struct leaf *l) 172319baf839SRobert Olsson { 172419baf839SRobert Olsson int found = 0; 172519baf839SRobert Olsson struct hlist_head *lih = &l->list; 172619baf839SRobert Olsson struct hlist_node *node, *tmp; 172719baf839SRobert Olsson struct leaf_info *li = NULL; 172819baf839SRobert Olsson 172919baf839SRobert Olsson hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1730ef3660ceSStephen Hemminger found += trie_flush_list(&li->falh); 173119baf839SRobert Olsson 173219baf839SRobert Olsson if (list_empty(&li->falh)) { 17332373ce1cSRobert Olsson hlist_del_rcu(&li->hlist); 173419baf839SRobert Olsson free_leaf_info(li); 173519baf839SRobert Olsson } 173619baf839SRobert Olsson } 173719baf839SRobert Olsson return found; 173819baf839SRobert Olsson } 173919baf839SRobert Olsson 174082cfbb00SStephen Hemminger /* 174182cfbb00SStephen Hemminger * Scan for the next right leaf starting at node p->child[idx] 174282cfbb00SStephen Hemminger * Since we have back pointer, no recursion necessary. 174382cfbb00SStephen Hemminger */ 1744b299e4f0SDavid S. Miller static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) 174519baf839SRobert Olsson { 174682cfbb00SStephen Hemminger do { 174782cfbb00SStephen Hemminger t_key idx; 174819baf839SRobert Olsson 174919baf839SRobert Olsson if (c) 175082cfbb00SStephen Hemminger idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1; 175119baf839SRobert Olsson else 175282cfbb00SStephen Hemminger idx = 0; 175319baf839SRobert Olsson 175482cfbb00SStephen Hemminger while (idx < 1u << p->bits) { 175582cfbb00SStephen Hemminger c = tnode_get_child_rcu(p, idx++); 17562373ce1cSRobert Olsson if (!c) 175791b9a277SOlof Johansson continue; 175819baf839SRobert Olsson 175982cfbb00SStephen Hemminger if (IS_LEAF(c)) { 176082cfbb00SStephen Hemminger prefetch(p->child[idx]); 17612373ce1cSRobert Olsson return (struct leaf *) c; 176219baf839SRobert Olsson } 176382cfbb00SStephen Hemminger 176482cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 176582cfbb00SStephen Hemminger p = (struct tnode *) c; 176682cfbb00SStephen Hemminger idx = 0; 176719baf839SRobert Olsson } 176882cfbb00SStephen Hemminger 176982cfbb00SStephen Hemminger /* Node empty, walk back up to parent */ 1770b299e4f0SDavid S. Miller c = (struct rt_trie_node *) p; 177182cfbb00SStephen Hemminger } while ((p = node_parent_rcu(c)) != NULL); 177282cfbb00SStephen Hemminger 177382cfbb00SStephen Hemminger return NULL; /* Root of trie */ 177482cfbb00SStephen Hemminger } 177582cfbb00SStephen Hemminger 177682cfbb00SStephen Hemminger static struct leaf *trie_firstleaf(struct trie *t) 177782cfbb00SStephen Hemminger { 1778a034ee3cSEric Dumazet struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie); 177982cfbb00SStephen Hemminger 178082cfbb00SStephen Hemminger if (!n) 178182cfbb00SStephen Hemminger return NULL; 178282cfbb00SStephen Hemminger 178382cfbb00SStephen Hemminger if (IS_LEAF(n)) /* trie is just a leaf */ 178482cfbb00SStephen Hemminger return (struct leaf *) n; 178582cfbb00SStephen Hemminger 178682cfbb00SStephen Hemminger return leaf_walk_rcu(n, NULL); 178782cfbb00SStephen Hemminger } 178882cfbb00SStephen Hemminger 178982cfbb00SStephen Hemminger static struct leaf *trie_nextleaf(struct leaf *l) 179082cfbb00SStephen Hemminger { 1791b299e4f0SDavid S. Miller struct rt_trie_node *c = (struct rt_trie_node *) l; 1792b902e573SJarek Poplawski struct tnode *p = node_parent_rcu(c); 179382cfbb00SStephen Hemminger 179482cfbb00SStephen Hemminger if (!p) 179582cfbb00SStephen Hemminger return NULL; /* trie with just one leaf */ 179682cfbb00SStephen Hemminger 179782cfbb00SStephen Hemminger return leaf_walk_rcu(p, c); 179819baf839SRobert Olsson } 179919baf839SRobert Olsson 180071d67e66SStephen Hemminger static struct leaf *trie_leafindex(struct trie *t, int index) 180171d67e66SStephen Hemminger { 180271d67e66SStephen Hemminger struct leaf *l = trie_firstleaf(t); 180371d67e66SStephen Hemminger 1804ec28cf73SStephen Hemminger while (l && index-- > 0) 180571d67e66SStephen Hemminger l = trie_nextleaf(l); 1806ec28cf73SStephen Hemminger 180771d67e66SStephen Hemminger return l; 180871d67e66SStephen Hemminger } 180971d67e66SStephen Hemminger 181071d67e66SStephen Hemminger 1811d562f1f8SRobert Olsson /* 1812d562f1f8SRobert Olsson * Caller must hold RTNL. 1813d562f1f8SRobert Olsson */ 181416c6cf8bSStephen Hemminger int fib_table_flush(struct fib_table *tb) 181519baf839SRobert Olsson { 181619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 18179195bef7SStephen Hemminger struct leaf *l, *ll = NULL; 181882cfbb00SStephen Hemminger int found = 0; 181919baf839SRobert Olsson 182082cfbb00SStephen Hemminger for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { 1821ef3660ceSStephen Hemminger found += trie_flush_leaf(l); 182219baf839SRobert Olsson 182319baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 18249195bef7SStephen Hemminger trie_leaf_remove(t, ll); 182519baf839SRobert Olsson ll = l; 182619baf839SRobert Olsson } 182719baf839SRobert Olsson 182819baf839SRobert Olsson if (ll && hlist_empty(&ll->list)) 18299195bef7SStephen Hemminger trie_leaf_remove(t, ll); 183019baf839SRobert Olsson 18310c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 183219baf839SRobert Olsson return found; 183319baf839SRobert Olsson } 183419baf839SRobert Olsson 18354aa2c466SPavel Emelyanov void fib_free_table(struct fib_table *tb) 18364aa2c466SPavel Emelyanov { 18374aa2c466SPavel Emelyanov kfree(tb); 18384aa2c466SPavel Emelyanov } 18394aa2c466SPavel Emelyanov 1840a07f5f50SStephen Hemminger static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1841a07f5f50SStephen Hemminger struct fib_table *tb, 184219baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 184319baf839SRobert Olsson { 184419baf839SRobert Olsson int i, s_i; 184519baf839SRobert Olsson struct fib_alias *fa; 184632ab5f80SAl Viro __be32 xkey = htonl(key); 184719baf839SRobert Olsson 184871d67e66SStephen Hemminger s_i = cb->args[5]; 184919baf839SRobert Olsson i = 0; 185019baf839SRobert Olsson 18512373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 18522373ce1cSRobert Olsson 18532373ce1cSRobert Olsson list_for_each_entry_rcu(fa, fah, fa_list) { 185419baf839SRobert Olsson if (i < s_i) { 185519baf839SRobert Olsson i++; 185619baf839SRobert Olsson continue; 185719baf839SRobert Olsson } 185819baf839SRobert Olsson 185919baf839SRobert Olsson if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 186019baf839SRobert Olsson cb->nlh->nlmsg_seq, 186119baf839SRobert Olsson RTM_NEWROUTE, 186219baf839SRobert Olsson tb->tb_id, 186319baf839SRobert Olsson fa->fa_type, 186419baf839SRobert Olsson fa->fa_scope, 1865be403ea1SThomas Graf xkey, 186619baf839SRobert Olsson plen, 186719baf839SRobert Olsson fa->fa_tos, 186864347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 186971d67e66SStephen Hemminger cb->args[5] = i; 187019baf839SRobert Olsson return -1; 187119baf839SRobert Olsson } 187219baf839SRobert Olsson i++; 187319baf839SRobert Olsson } 187471d67e66SStephen Hemminger cb->args[5] = i; 187519baf839SRobert Olsson return skb->len; 187619baf839SRobert Olsson } 187719baf839SRobert Olsson 1878a88ee229SStephen Hemminger static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, 1879a07f5f50SStephen Hemminger struct sk_buff *skb, struct netlink_callback *cb) 188019baf839SRobert Olsson { 1881a88ee229SStephen Hemminger struct leaf_info *li; 1882a88ee229SStephen Hemminger struct hlist_node *node; 1883a88ee229SStephen Hemminger int i, s_i; 188491b9a277SOlof Johansson 188571d67e66SStephen Hemminger s_i = cb->args[4]; 1886a88ee229SStephen Hemminger i = 0; 188719baf839SRobert Olsson 1888a88ee229SStephen Hemminger /* rcu_read_lock is hold by caller */ 1889a88ee229SStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 1890a88ee229SStephen Hemminger if (i < s_i) { 1891a88ee229SStephen Hemminger i++; 189219baf839SRobert Olsson continue; 1893a88ee229SStephen Hemminger } 189419baf839SRobert Olsson 1895a88ee229SStephen Hemminger if (i > s_i) 189671d67e66SStephen Hemminger cb->args[5] = 0; 189719baf839SRobert Olsson 1898a88ee229SStephen Hemminger if (list_empty(&li->falh)) 189919baf839SRobert Olsson continue; 190019baf839SRobert Olsson 1901a88ee229SStephen Hemminger if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { 190271d67e66SStephen Hemminger cb->args[4] = i; 190319baf839SRobert Olsson return -1; 190419baf839SRobert Olsson } 1905a88ee229SStephen Hemminger i++; 190619baf839SRobert Olsson } 1907a88ee229SStephen Hemminger 190871d67e66SStephen Hemminger cb->args[4] = i; 190919baf839SRobert Olsson return skb->len; 191019baf839SRobert Olsson } 191119baf839SRobert Olsson 191216c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1913a07f5f50SStephen Hemminger struct netlink_callback *cb) 191419baf839SRobert Olsson { 1915a88ee229SStephen Hemminger struct leaf *l; 191619baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 1917d5ce8a0eSStephen Hemminger t_key key = cb->args[2]; 191871d67e66SStephen Hemminger int count = cb->args[3]; 191919baf839SRobert Olsson 19202373ce1cSRobert Olsson rcu_read_lock(); 1921d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1922d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1923d5ce8a0eSStephen Hemminger */ 192471d67e66SStephen Hemminger if (count == 0) 1925d5ce8a0eSStephen Hemminger l = trie_firstleaf(t); 1926d5ce8a0eSStephen Hemminger else { 192771d67e66SStephen Hemminger /* Normally, continue from last key, but if that is missing 192871d67e66SStephen Hemminger * fallback to using slow rescan 1929d5ce8a0eSStephen Hemminger */ 193071d67e66SStephen Hemminger l = fib_find_node(t, key); 193171d67e66SStephen Hemminger if (!l) 193271d67e66SStephen Hemminger l = trie_leafindex(t, count); 193319baf839SRobert Olsson } 1934a88ee229SStephen Hemminger 1935d5ce8a0eSStephen Hemminger while (l) { 1936d5ce8a0eSStephen Hemminger cb->args[2] = l->key; 1937a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 193871d67e66SStephen Hemminger cb->args[3] = count; 19392373ce1cSRobert Olsson rcu_read_unlock(); 194019baf839SRobert Olsson return -1; 194119baf839SRobert Olsson } 1942d5ce8a0eSStephen Hemminger 194371d67e66SStephen Hemminger ++count; 1944d5ce8a0eSStephen Hemminger l = trie_nextleaf(l); 194571d67e66SStephen Hemminger memset(&cb->args[4], 0, 194671d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 1947a88ee229SStephen Hemminger } 194871d67e66SStephen Hemminger cb->args[3] = count; 1949a88ee229SStephen Hemminger rcu_read_unlock(); 1950a88ee229SStephen Hemminger 1951a88ee229SStephen Hemminger return skb->len; 1952a88ee229SStephen Hemminger } 195319baf839SRobert Olsson 19545348ba85SDavid S. Miller void __init fib_trie_init(void) 19557f9b8052SStephen Hemminger { 1956a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1957a07f5f50SStephen Hemminger sizeof(struct fib_alias), 1958bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 1959bc3c8c1eSStephen Hemminger 1960bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 1961bc3c8c1eSStephen Hemminger max(sizeof(struct leaf), 1962bc3c8c1eSStephen Hemminger sizeof(struct leaf_info)), 1963bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 19647f9b8052SStephen Hemminger } 196519baf839SRobert Olsson 19667f9b8052SStephen Hemminger 19675348ba85SDavid S. Miller struct fib_table *fib_trie_table(u32 id) 196819baf839SRobert Olsson { 196919baf839SRobert Olsson struct fib_table *tb; 197019baf839SRobert Olsson struct trie *t; 197119baf839SRobert Olsson 197219baf839SRobert Olsson tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 197319baf839SRobert Olsson GFP_KERNEL); 197419baf839SRobert Olsson if (tb == NULL) 197519baf839SRobert Olsson return NULL; 197619baf839SRobert Olsson 197719baf839SRobert Olsson tb->tb_id = id; 1978971b893eSDenis V. Lunev tb->tb_default = -1; 197919baf839SRobert Olsson 198019baf839SRobert Olsson t = (struct trie *) tb->tb_data; 1981c28a1cf4SStephen Hemminger memset(t, 0, sizeof(*t)); 198219baf839SRobert Olsson 198319baf839SRobert Olsson if (id == RT_TABLE_LOCAL) 1984a07f5f50SStephen Hemminger pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION); 198519baf839SRobert Olsson 198619baf839SRobert Olsson return tb; 198719baf839SRobert Olsson } 198819baf839SRobert Olsson 1989cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 1990cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 1991cb7b593cSStephen Hemminger struct fib_trie_iter { 19921c340b2fSDenis V. Lunev struct seq_net_private p; 19933d3b2d25SStephen Hemminger struct fib_table *tb; 1994cb7b593cSStephen Hemminger struct tnode *tnode; 1995a034ee3cSEric Dumazet unsigned int index; 1996a034ee3cSEric Dumazet unsigned int depth; 1997cb7b593cSStephen Hemminger }; 199819baf839SRobert Olsson 1999b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) 200019baf839SRobert Olsson { 2001cb7b593cSStephen Hemminger struct tnode *tn = iter->tnode; 2002a034ee3cSEric Dumazet unsigned int cindex = iter->index; 2003cb7b593cSStephen Hemminger struct tnode *p; 200419baf839SRobert Olsson 20056640e697SEric W. Biederman /* A single entry routing table */ 20066640e697SEric W. Biederman if (!tn) 20076640e697SEric W. Biederman return NULL; 20086640e697SEric W. Biederman 2009cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2010cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 2011cb7b593cSStephen Hemminger rescan: 2012cb7b593cSStephen Hemminger while (cindex < (1<<tn->bits)) { 2013b299e4f0SDavid S. Miller struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); 201419baf839SRobert Olsson 2015cb7b593cSStephen Hemminger if (n) { 201619baf839SRobert Olsson if (IS_LEAF(n)) { 2017cb7b593cSStephen Hemminger iter->tnode = tn; 2018cb7b593cSStephen Hemminger iter->index = cindex + 1; 201991b9a277SOlof Johansson } else { 2020cb7b593cSStephen Hemminger /* push down one level */ 2021cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2022cb7b593cSStephen Hemminger iter->index = 0; 2023cb7b593cSStephen Hemminger ++iter->depth; 202419baf839SRobert Olsson } 2025cb7b593cSStephen Hemminger return n; 202619baf839SRobert Olsson } 202719baf839SRobert Olsson 2028cb7b593cSStephen Hemminger ++cindex; 2029cb7b593cSStephen Hemminger } 2030cb7b593cSStephen Hemminger 2031cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 2032b299e4f0SDavid S. Miller p = node_parent_rcu((struct rt_trie_node *)tn); 2033cb7b593cSStephen Hemminger if (p) { 2034cb7b593cSStephen Hemminger cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2035cb7b593cSStephen Hemminger tn = p; 2036cb7b593cSStephen Hemminger --iter->depth; 2037cb7b593cSStephen Hemminger goto rescan; 2038cb7b593cSStephen Hemminger } 2039cb7b593cSStephen Hemminger 2040cb7b593cSStephen Hemminger /* got root? */ 2041cb7b593cSStephen Hemminger return NULL; 2042cb7b593cSStephen Hemminger } 2043cb7b593cSStephen Hemminger 2044b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, 2045cb7b593cSStephen Hemminger struct trie *t) 2046cb7b593cSStephen Hemminger { 2047b299e4f0SDavid S. Miller struct rt_trie_node *n; 20485ddf0eb2SRobert Olsson 20495ddf0eb2SRobert Olsson if (!t) 20505ddf0eb2SRobert Olsson return NULL; 20515ddf0eb2SRobert Olsson 20525ddf0eb2SRobert Olsson n = rcu_dereference(t->trie); 20533d3b2d25SStephen Hemminger if (!n) 20545ddf0eb2SRobert Olsson return NULL; 2055cb7b593cSStephen Hemminger 20566640e697SEric W. Biederman if (IS_TNODE(n)) { 2057cb7b593cSStephen Hemminger iter->tnode = (struct tnode *) n; 2058cb7b593cSStephen Hemminger iter->index = 0; 20591d25cd6cSRobert Olsson iter->depth = 1; 20606640e697SEric W. Biederman } else { 20616640e697SEric W. Biederman iter->tnode = NULL; 20626640e697SEric W. Biederman iter->index = 0; 20636640e697SEric W. Biederman iter->depth = 0; 20646640e697SEric W. Biederman } 20653d3b2d25SStephen Hemminger 2066cb7b593cSStephen Hemminger return n; 2067cb7b593cSStephen Hemminger } 2068cb7b593cSStephen Hemminger 2069cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 207019baf839SRobert Olsson { 2071b299e4f0SDavid S. Miller struct rt_trie_node *n; 2072cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2073cb7b593cSStephen Hemminger 2074cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 207519baf839SRobert Olsson 20762373ce1cSRobert Olsson rcu_read_lock(); 20773d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2078cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 207993672292SStephen Hemminger struct leaf *l = (struct leaf *)n; 208093672292SStephen Hemminger struct leaf_info *li; 208193672292SStephen Hemminger struct hlist_node *tmp; 208293672292SStephen Hemminger 208319baf839SRobert Olsson s->leaves++; 2084cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2085cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2086cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 208793672292SStephen Hemminger 208893672292SStephen Hemminger hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) 208993672292SStephen Hemminger ++s->prefixes; 209091b9a277SOlof Johansson } else { 2091cb7b593cSStephen Hemminger const struct tnode *tn = (const struct tnode *) n; 2092cb7b593cSStephen Hemminger int i; 209319baf839SRobert Olsson 209419baf839SRobert Olsson s->tnodes++; 209506ef921dSRobert Olsson if (tn->bits < MAX_STAT_DEPTH) 209619baf839SRobert Olsson s->nodesizes[tn->bits]++; 209706ef921dSRobert Olsson 2098cb7b593cSStephen Hemminger for (i = 0; i < (1<<tn->bits); i++) 2099cb7b593cSStephen Hemminger if (!tn->child[i]) 210019baf839SRobert Olsson s->nullpointers++; 210119baf839SRobert Olsson } 210219baf839SRobert Olsson } 21032373ce1cSRobert Olsson rcu_read_unlock(); 210419baf839SRobert Olsson } 210519baf839SRobert Olsson 210619baf839SRobert Olsson /* 210719baf839SRobert Olsson * This outputs /proc/net/fib_triestats 210819baf839SRobert Olsson */ 2109cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 211019baf839SRobert Olsson { 2111a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 211219baf839SRobert Olsson 211319baf839SRobert Olsson if (stat->leaves) 211419baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 211519baf839SRobert Olsson else 211619baf839SRobert Olsson avdepth = 0; 211719baf839SRobert Olsson 2118a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2119a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2120cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2121cb7b593cSStephen Hemminger 2122cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2123cb7b593cSStephen Hemminger bytes = sizeof(struct leaf) * stat->leaves; 212493672292SStephen Hemminger 212593672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 212693672292SStephen Hemminger bytes += sizeof(struct leaf_info) * stat->prefixes; 212793672292SStephen Hemminger 2128187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 212919baf839SRobert Olsson bytes += sizeof(struct tnode) * stat->tnodes; 213019baf839SRobert Olsson 213106ef921dSRobert Olsson max = MAX_STAT_DEPTH; 213206ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 213319baf839SRobert Olsson max--; 213419baf839SRobert Olsson 2135cb7b593cSStephen Hemminger pointers = 0; 213619baf839SRobert Olsson for (i = 1; i <= max; i++) 213719baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2138187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 213919baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 214019baf839SRobert Olsson } 2141cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2142187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2143cb7b593cSStephen Hemminger 2144b299e4f0SDavid S. Miller bytes += sizeof(struct rt_trie_node *) * pointers; 2145187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2146187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 214766a2f7fdSStephen Hemminger } 214819baf839SRobert Olsson 214919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 215066a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 215166a2f7fdSStephen Hemminger const struct trie_use_stats *stats) 215266a2f7fdSStephen Hemminger { 215366a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 215466a2f7fdSStephen Hemminger seq_printf(seq, "gets = %u\n", stats->gets); 215566a2f7fdSStephen Hemminger seq_printf(seq, "backtracks = %u\n", stats->backtrack); 2156a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 2157a07f5f50SStephen Hemminger stats->semantic_match_passed); 2158a07f5f50SStephen Hemminger seq_printf(seq, "semantic match miss = %u\n", 2159a07f5f50SStephen Hemminger stats->semantic_match_miss); 216066a2f7fdSStephen Hemminger seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); 2161a07f5f50SStephen Hemminger seq_printf(seq, "skipped node resize = %u\n\n", 2162a07f5f50SStephen Hemminger stats->resize_node_skipped); 216319baf839SRobert Olsson } 216466a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 216566a2f7fdSStephen Hemminger 21663d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2167d717a9a6SStephen Hemminger { 21683d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 21693d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 21703d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 21713d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 21723d3b2d25SStephen Hemminger else 21733d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2174d717a9a6SStephen Hemminger } 217519baf839SRobert Olsson 21763d3b2d25SStephen Hemminger 217719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 217819baf839SRobert Olsson { 21791c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 21803d3b2d25SStephen Hemminger unsigned int h; 2181877a9bffSEric W. Biederman 2182d717a9a6SStephen Hemminger seq_printf(seq, 2183a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2184a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 218519baf839SRobert Olsson sizeof(struct leaf), sizeof(struct tnode)); 218619baf839SRobert Olsson 21873d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 21883d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 21893d3b2d25SStephen Hemminger struct hlist_node *node; 21903d3b2d25SStephen Hemminger struct fib_table *tb; 2191cb7b593cSStephen Hemminger 21923d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 21933d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 21943d3b2d25SStephen Hemminger struct trie_stat stat; 21953d3b2d25SStephen Hemminger 21963d3b2d25SStephen Hemminger if (!t) 21973d3b2d25SStephen Hemminger continue; 21983d3b2d25SStephen Hemminger 21993d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22003d3b2d25SStephen Hemminger 22013d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22023d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22033d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22043d3b2d25SStephen Hemminger trie_show_usage(seq, &t->stats); 22053d3b2d25SStephen Hemminger #endif 22063d3b2d25SStephen Hemminger } 22073d3b2d25SStephen Hemminger } 2208cb7b593cSStephen Hemminger 220919baf839SRobert Olsson return 0; 221019baf839SRobert Olsson } 221119baf839SRobert Olsson 221219baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 221319baf839SRobert Olsson { 2214de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 22151c340b2fSDenis V. Lunev } 22161c340b2fSDenis V. Lunev 22179a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 221819baf839SRobert Olsson .owner = THIS_MODULE, 221919baf839SRobert Olsson .open = fib_triestat_seq_open, 222019baf839SRobert Olsson .read = seq_read, 222119baf839SRobert Olsson .llseek = seq_lseek, 2222b6fcbdb4SPavel Emelyanov .release = single_release_net, 222319baf839SRobert Olsson }; 222419baf839SRobert Olsson 2225b299e4f0SDavid S. Miller static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 222619baf839SRobert Olsson { 22271218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 22281218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2229cb7b593cSStephen Hemminger loff_t idx = 0; 22303d3b2d25SStephen Hemminger unsigned int h; 22313d3b2d25SStephen Hemminger 22323d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22333d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22343d3b2d25SStephen Hemminger struct hlist_node *node; 22353d3b2d25SStephen Hemminger struct fib_table *tb; 22363d3b2d25SStephen Hemminger 22373d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 2238b299e4f0SDavid S. Miller struct rt_trie_node *n; 2239cb7b593cSStephen Hemminger 22403d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 22413d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 22423d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 22433d3b2d25SStephen Hemminger if (pos == idx++) { 22443d3b2d25SStephen Hemminger iter->tb = tb; 2245cb7b593cSStephen Hemminger return n; 224619baf839SRobert Olsson } 22473d3b2d25SStephen Hemminger } 22483d3b2d25SStephen Hemminger } 224919baf839SRobert Olsson 225019baf839SRobert Olsson return NULL; 225119baf839SRobert Olsson } 225219baf839SRobert Olsson 225319baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2254c95aaf9aSStephen Hemminger __acquires(RCU) 225519baf839SRobert Olsson { 2256cb7b593cSStephen Hemminger rcu_read_lock(); 22571218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 225819baf839SRobert Olsson } 225919baf839SRobert Olsson 226019baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 226119baf839SRobert Olsson { 2262cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 22631218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 22643d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 22653d3b2d25SStephen Hemminger struct hlist_node *tb_node; 22663d3b2d25SStephen Hemminger unsigned int h; 2267b299e4f0SDavid S. Miller struct rt_trie_node *n; 2268cb7b593cSStephen Hemminger 226919baf839SRobert Olsson ++*pos; 22703d3b2d25SStephen Hemminger /* next node in same table */ 22713d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 22723d3b2d25SStephen Hemminger if (n) 22733d3b2d25SStephen Hemminger return n; 227491b9a277SOlof Johansson 22753d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 22763d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 22773d3b2d25SStephen Hemminger while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 22783d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 22793d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 22803d3b2d25SStephen Hemminger if (n) 22813d3b2d25SStephen Hemminger goto found; 22823d3b2d25SStephen Hemminger } 2283cb7b593cSStephen Hemminger 22843d3b2d25SStephen Hemminger /* new hash chain */ 22853d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 22863d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22873d3b2d25SStephen Hemminger hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { 22883d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 22893d3b2d25SStephen Hemminger if (n) 22903d3b2d25SStephen Hemminger goto found; 22913d3b2d25SStephen Hemminger } 22923d3b2d25SStephen Hemminger } 2293cb7b593cSStephen Hemminger return NULL; 22943d3b2d25SStephen Hemminger 22953d3b2d25SStephen Hemminger found: 22963d3b2d25SStephen Hemminger iter->tb = tb; 22973d3b2d25SStephen Hemminger return n; 229819baf839SRobert Olsson } 229919baf839SRobert Olsson 230019baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2301c95aaf9aSStephen Hemminger __releases(RCU) 230219baf839SRobert Olsson { 2303cb7b593cSStephen Hemminger rcu_read_unlock(); 230419baf839SRobert Olsson } 230519baf839SRobert Olsson 2306cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2307cb7b593cSStephen Hemminger { 2308a034ee3cSEric Dumazet while (n-- > 0) 2309a034ee3cSEric Dumazet seq_puts(seq, " "); 2310cb7b593cSStephen Hemminger } 231119baf839SRobert Olsson 231228d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2313cb7b593cSStephen Hemminger { 2314cb7b593cSStephen Hemminger switch (s) { 2315cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2316cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2317cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2318cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2319cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2320cb7b593cSStephen Hemminger default: 232128d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2322cb7b593cSStephen Hemminger return buf; 2323cb7b593cSStephen Hemminger } 2324cb7b593cSStephen Hemminger } 2325cb7b593cSStephen Hemminger 232636cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2327cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2328cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2329cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2330cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2331cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2332cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2333cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2334cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2335cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2336cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2337cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2338cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2339cb7b593cSStephen Hemminger }; 2340cb7b593cSStephen Hemminger 2341a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2342cb7b593cSStephen Hemminger { 2343cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2344cb7b593cSStephen Hemminger return rtn_type_names[t]; 234528d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2346cb7b593cSStephen Hemminger return buf; 2347cb7b593cSStephen Hemminger } 2348cb7b593cSStephen Hemminger 2349cb7b593cSStephen Hemminger /* Pretty print the trie */ 235019baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 235119baf839SRobert Olsson { 2352cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 2353b299e4f0SDavid S. Miller struct rt_trie_node *n = v; 235419baf839SRobert Olsson 23553d3b2d25SStephen Hemminger if (!node_parent_rcu(n)) 23563d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2357095b8501SRobert Olsson 2358095b8501SRobert Olsson if (IS_TNODE(n)) { 2359095b8501SRobert Olsson struct tnode *tn = (struct tnode *) n; 2360ab66b4a7SStephen Hemminger __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2361095b8501SRobert Olsson 23621d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2363673d57e7SHarvey Harrison seq_printf(seq, " +-- %pI4/%d %d %d %d\n", 2364673d57e7SHarvey Harrison &prf, tn->pos, tn->bits, tn->full_children, 23651d25cd6cSRobert Olsson tn->empty_children); 23661d25cd6cSRobert Olsson 2367cb7b593cSStephen Hemminger } else { 2368cb7b593cSStephen Hemminger struct leaf *l = (struct leaf *) n; 23691328042eSStephen Hemminger struct leaf_info *li; 23701328042eSStephen Hemminger struct hlist_node *node; 237132ab5f80SAl Viro __be32 val = htonl(l->key); 2372cb7b593cSStephen Hemminger 2373cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2374673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 237528d36e37SEric Dumazet 23761328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2377cb7b593cSStephen Hemminger struct fib_alias *fa; 237828d36e37SEric Dumazet 2379cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 238028d36e37SEric Dumazet char buf1[32], buf2[32]; 238128d36e37SEric Dumazet 2382cb7b593cSStephen Hemminger seq_indent(seq, iter->depth+1); 23831328042eSStephen Hemminger seq_printf(seq, " /%d %s %s", li->plen, 238428d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 238528d36e37SEric Dumazet fa->fa_scope), 238628d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 238728d36e37SEric Dumazet fa->fa_type)); 2388cb7b593cSStephen Hemminger if (fa->fa_tos) 2389b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2390cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2391cb7b593cSStephen Hemminger } 2392cb7b593cSStephen Hemminger } 2393cb7b593cSStephen Hemminger } 239419baf839SRobert Olsson 239519baf839SRobert Olsson return 0; 239619baf839SRobert Olsson } 239719baf839SRobert Olsson 2398f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 239919baf839SRobert Olsson .start = fib_trie_seq_start, 240019baf839SRobert Olsson .next = fib_trie_seq_next, 240119baf839SRobert Olsson .stop = fib_trie_seq_stop, 240219baf839SRobert Olsson .show = fib_trie_seq_show, 240319baf839SRobert Olsson }; 240419baf839SRobert Olsson 240519baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 240619baf839SRobert Olsson { 24071c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2408cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 240919baf839SRobert Olsson } 241019baf839SRobert Olsson 24119a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 241219baf839SRobert Olsson .owner = THIS_MODULE, 241319baf839SRobert Olsson .open = fib_trie_seq_open, 241419baf839SRobert Olsson .read = seq_read, 241519baf839SRobert Olsson .llseek = seq_lseek, 24161c340b2fSDenis V. Lunev .release = seq_release_net, 241719baf839SRobert Olsson }; 241819baf839SRobert Olsson 24198315f5d8SStephen Hemminger struct fib_route_iter { 24208315f5d8SStephen Hemminger struct seq_net_private p; 24218315f5d8SStephen Hemminger struct trie *main_trie; 24228315f5d8SStephen Hemminger loff_t pos; 24238315f5d8SStephen Hemminger t_key key; 24248315f5d8SStephen Hemminger }; 24258315f5d8SStephen Hemminger 24268315f5d8SStephen Hemminger static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) 24278315f5d8SStephen Hemminger { 24288315f5d8SStephen Hemminger struct leaf *l = NULL; 24298315f5d8SStephen Hemminger struct trie *t = iter->main_trie; 24308315f5d8SStephen Hemminger 24318315f5d8SStephen Hemminger /* use cache location of last found key */ 24328315f5d8SStephen Hemminger if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key))) 24338315f5d8SStephen Hemminger pos -= iter->pos; 24348315f5d8SStephen Hemminger else { 24358315f5d8SStephen Hemminger iter->pos = 0; 24368315f5d8SStephen Hemminger l = trie_firstleaf(t); 24378315f5d8SStephen Hemminger } 24388315f5d8SStephen Hemminger 24398315f5d8SStephen Hemminger while (l && pos-- > 0) { 24408315f5d8SStephen Hemminger iter->pos++; 24418315f5d8SStephen Hemminger l = trie_nextleaf(l); 24428315f5d8SStephen Hemminger } 24438315f5d8SStephen Hemminger 24448315f5d8SStephen Hemminger if (l) 24458315f5d8SStephen Hemminger iter->key = pos; /* remember it */ 24468315f5d8SStephen Hemminger else 24478315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 24488315f5d8SStephen Hemminger 24498315f5d8SStephen Hemminger return l; 24508315f5d8SStephen Hemminger } 24518315f5d8SStephen Hemminger 24528315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 24538315f5d8SStephen Hemminger __acquires(RCU) 24548315f5d8SStephen Hemminger { 24558315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 24568315f5d8SStephen Hemminger struct fib_table *tb; 24578315f5d8SStephen Hemminger 24588315f5d8SStephen Hemminger rcu_read_lock(); 24591218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 24608315f5d8SStephen Hemminger if (!tb) 24618315f5d8SStephen Hemminger return NULL; 24628315f5d8SStephen Hemminger 24638315f5d8SStephen Hemminger iter->main_trie = (struct trie *) tb->tb_data; 24648315f5d8SStephen Hemminger if (*pos == 0) 24658315f5d8SStephen Hemminger return SEQ_START_TOKEN; 24668315f5d8SStephen Hemminger else 24678315f5d8SStephen Hemminger return fib_route_get_idx(iter, *pos - 1); 24688315f5d8SStephen Hemminger } 24698315f5d8SStephen Hemminger 24708315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 24718315f5d8SStephen Hemminger { 24728315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 24738315f5d8SStephen Hemminger struct leaf *l = v; 24748315f5d8SStephen Hemminger 24758315f5d8SStephen Hemminger ++*pos; 24768315f5d8SStephen Hemminger if (v == SEQ_START_TOKEN) { 24778315f5d8SStephen Hemminger iter->pos = 0; 24788315f5d8SStephen Hemminger l = trie_firstleaf(iter->main_trie); 24798315f5d8SStephen Hemminger } else { 24808315f5d8SStephen Hemminger iter->pos++; 24818315f5d8SStephen Hemminger l = trie_nextleaf(l); 24828315f5d8SStephen Hemminger } 24838315f5d8SStephen Hemminger 24848315f5d8SStephen Hemminger if (l) 24858315f5d8SStephen Hemminger iter->key = l->key; 24868315f5d8SStephen Hemminger else 24878315f5d8SStephen Hemminger iter->pos = 0; 24888315f5d8SStephen Hemminger return l; 24898315f5d8SStephen Hemminger } 24908315f5d8SStephen Hemminger 24918315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 24928315f5d8SStephen Hemminger __releases(RCU) 24938315f5d8SStephen Hemminger { 24948315f5d8SStephen Hemminger rcu_read_unlock(); 24958315f5d8SStephen Hemminger } 24968315f5d8SStephen Hemminger 2497a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2498cb7b593cSStephen Hemminger { 2499a034ee3cSEric Dumazet unsigned int flags = 0; 2500cb7b593cSStephen Hemminger 2501a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2502a034ee3cSEric Dumazet flags = RTF_REJECT; 2503cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2504cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 250532ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2506cb7b593cSStephen Hemminger flags |= RTF_HOST; 2507cb7b593cSStephen Hemminger flags |= RTF_UP; 2508cb7b593cSStephen Hemminger return flags; 2509cb7b593cSStephen Hemminger } 2510cb7b593cSStephen Hemminger 2511cb7b593cSStephen Hemminger /* 2512cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2513cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2514cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2515cb7b593cSStephen Hemminger * legacy utilities 2516cb7b593cSStephen Hemminger */ 2517cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2518cb7b593cSStephen Hemminger { 2519cb7b593cSStephen Hemminger struct leaf *l = v; 25201328042eSStephen Hemminger struct leaf_info *li; 25211328042eSStephen Hemminger struct hlist_node *node; 2522cb7b593cSStephen Hemminger 2523cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2524cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2525cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2526cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2527cb7b593cSStephen Hemminger return 0; 2528cb7b593cSStephen Hemminger } 2529cb7b593cSStephen Hemminger 25301328042eSStephen Hemminger hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2531cb7b593cSStephen Hemminger struct fib_alias *fa; 253232ab5f80SAl Viro __be32 mask, prefix; 2533cb7b593cSStephen Hemminger 2534cb7b593cSStephen Hemminger mask = inet_make_mask(li->plen); 2535cb7b593cSStephen Hemminger prefix = htonl(l->key); 2536cb7b593cSStephen Hemminger 2537cb7b593cSStephen Hemminger list_for_each_entry_rcu(fa, &li->falh, fa_list) { 25381371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 2539a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 25405e659e4cSPavel Emelyanov int len; 2541cb7b593cSStephen Hemminger 2542cb7b593cSStephen Hemminger if (fa->fa_type == RTN_BROADCAST 2543cb7b593cSStephen Hemminger || fa->fa_type == RTN_MULTICAST) 2544cb7b593cSStephen Hemminger continue; 2545cb7b593cSStephen Hemminger 2546cb7b593cSStephen Hemminger if (fi) 25475e659e4cSPavel Emelyanov seq_printf(seq, 25485e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 25495e659e4cSPavel Emelyanov "%d\t%08X\t%d\t%u\t%u%n", 2550cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2551cb7b593cSStephen Hemminger prefix, 2552cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2553cb7b593cSStephen Hemminger fi->fib_priority, 2554cb7b593cSStephen Hemminger mask, 2555a07f5f50SStephen Hemminger (fi->fib_advmss ? 2556a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2557cb7b593cSStephen Hemminger fi->fib_window, 25585e659e4cSPavel Emelyanov fi->fib_rtt >> 3, &len); 2559cb7b593cSStephen Hemminger else 25605e659e4cSPavel Emelyanov seq_printf(seq, 25615e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 25625e659e4cSPavel Emelyanov "%d\t%08X\t%d\t%u\t%u%n", 2563cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 25645e659e4cSPavel Emelyanov mask, 0, 0, 0, &len); 2565cb7b593cSStephen Hemminger 25665e659e4cSPavel Emelyanov seq_printf(seq, "%*s\n", 127 - len, ""); 2567cb7b593cSStephen Hemminger } 2568cb7b593cSStephen Hemminger } 2569cb7b593cSStephen Hemminger 2570cb7b593cSStephen Hemminger return 0; 2571cb7b593cSStephen Hemminger } 2572cb7b593cSStephen Hemminger 2573f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 25748315f5d8SStephen Hemminger .start = fib_route_seq_start, 25758315f5d8SStephen Hemminger .next = fib_route_seq_next, 25768315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2577cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2578cb7b593cSStephen Hemminger }; 2579cb7b593cSStephen Hemminger 2580cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2581cb7b593cSStephen Hemminger { 25821c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 25838315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2584cb7b593cSStephen Hemminger } 2585cb7b593cSStephen Hemminger 25869a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2587cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2588cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2589cb7b593cSStephen Hemminger .read = seq_read, 2590cb7b593cSStephen Hemminger .llseek = seq_lseek, 25911c340b2fSDenis V. Lunev .release = seq_release_net, 2592cb7b593cSStephen Hemminger }; 2593cb7b593cSStephen Hemminger 259461a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 259519baf839SRobert Olsson { 259661a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2597cb7b593cSStephen Hemminger goto out1; 2598cb7b593cSStephen Hemminger 259961a02653SDenis V. Lunev if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, 260061a02653SDenis V. Lunev &fib_triestat_fops)) 2601cb7b593cSStephen Hemminger goto out2; 2602cb7b593cSStephen Hemminger 260361a02653SDenis V. Lunev if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) 2604cb7b593cSStephen Hemminger goto out3; 2605cb7b593cSStephen Hemminger 260619baf839SRobert Olsson return 0; 2607cb7b593cSStephen Hemminger 2608cb7b593cSStephen Hemminger out3: 260961a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 2610cb7b593cSStephen Hemminger out2: 261161a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 2612cb7b593cSStephen Hemminger out1: 2613cb7b593cSStephen Hemminger return -ENOMEM; 261419baf839SRobert Olsson } 261519baf839SRobert Olsson 261661a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 261719baf839SRobert Olsson { 261861a02653SDenis V. Lunev proc_net_remove(net, "fib_trie"); 261961a02653SDenis V. Lunev proc_net_remove(net, "fib_triestat"); 262061a02653SDenis V. Lunev proc_net_remove(net, "route"); 262119baf839SRobert Olsson } 262219baf839SRobert Olsson 262319baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2624