119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 541977f032SJiri Slaby #include <linux/bitops.h> 5519baf839SRobert Olsson #include <linux/types.h> 5619baf839SRobert Olsson #include <linux/kernel.h> 5719baf839SRobert Olsson #include <linux/mm.h> 5819baf839SRobert Olsson #include <linux/string.h> 5919baf839SRobert Olsson #include <linux/socket.h> 6019baf839SRobert Olsson #include <linux/sockios.h> 6119baf839SRobert Olsson #include <linux/errno.h> 6219baf839SRobert Olsson #include <linux/in.h> 6319baf839SRobert Olsson #include <linux/inet.h> 64cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6519baf839SRobert Olsson #include <linux/netdevice.h> 6619baf839SRobert Olsson #include <linux/if_arp.h> 6719baf839SRobert Olsson #include <linux/proc_fs.h> 682373ce1cSRobert Olsson #include <linux/rcupdate.h> 6919baf839SRobert Olsson #include <linux/skbuff.h> 7019baf839SRobert Olsson #include <linux/netlink.h> 7119baf839SRobert Olsson #include <linux/init.h> 7219baf839SRobert Olsson #include <linux/list.h> 735a0e3ad6STejun Heo #include <linux/slab.h> 74bc3b2d7fSPaul Gortmaker #include <linux/export.h> 75ffa915d0SDavid S. Miller #include <linux/vmalloc.h> 76b90eb754SJiri Pirko #include <linux/notifier.h> 77457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7819baf839SRobert Olsson #include <net/ip.h> 7919baf839SRobert Olsson #include <net/protocol.h> 8019baf839SRobert Olsson #include <net/route.h> 8119baf839SRobert Olsson #include <net/tcp.h> 8219baf839SRobert Olsson #include <net/sock.h> 8319baf839SRobert Olsson #include <net/ip_fib.h> 84f6d3c192SDavid Ahern #include <trace/events/fib.h> 8519baf839SRobert Olsson #include "fib_lookup.h" 8619baf839SRobert Olsson 87b90eb754SJiri Pirko static BLOCKING_NOTIFIER_HEAD(fib_chain); 88b90eb754SJiri Pirko 89b90eb754SJiri Pirko int register_fib_notifier(struct notifier_block *nb) 90b90eb754SJiri Pirko { 91b90eb754SJiri Pirko return blocking_notifier_chain_register(&fib_chain, nb); 92b90eb754SJiri Pirko } 93b90eb754SJiri Pirko EXPORT_SYMBOL(register_fib_notifier); 94b90eb754SJiri Pirko 95b90eb754SJiri Pirko int unregister_fib_notifier(struct notifier_block *nb) 96b90eb754SJiri Pirko { 97b90eb754SJiri Pirko return blocking_notifier_chain_unregister(&fib_chain, nb); 98b90eb754SJiri Pirko } 99b90eb754SJiri Pirko EXPORT_SYMBOL(unregister_fib_notifier); 100b90eb754SJiri Pirko 101b90eb754SJiri Pirko int call_fib_notifiers(struct net *net, enum fib_event_type event_type, 102b90eb754SJiri Pirko struct fib_notifier_info *info) 103b90eb754SJiri Pirko { 104b90eb754SJiri Pirko info->net = net; 105b90eb754SJiri Pirko return blocking_notifier_call_chain(&fib_chain, event_type, info); 106b90eb754SJiri Pirko } 107b90eb754SJiri Pirko 108b90eb754SJiri Pirko static int call_fib_entry_notifiers(struct net *net, 109b90eb754SJiri Pirko enum fib_event_type event_type, u32 dst, 110b90eb754SJiri Pirko int dst_len, struct fib_info *fi, 111b90eb754SJiri Pirko u8 tos, u8 type, u32 tb_id, u32 nlflags) 112b90eb754SJiri Pirko { 113b90eb754SJiri Pirko struct fib_entry_notifier_info info = { 114b90eb754SJiri Pirko .dst = dst, 115b90eb754SJiri Pirko .dst_len = dst_len, 116b90eb754SJiri Pirko .fi = fi, 117b90eb754SJiri Pirko .tos = tos, 118b90eb754SJiri Pirko .type = type, 119b90eb754SJiri Pirko .tb_id = tb_id, 120b90eb754SJiri Pirko .nlflags = nlflags, 121b90eb754SJiri Pirko }; 122b90eb754SJiri Pirko return call_fib_notifiers(net, event_type, &info.info); 123b90eb754SJiri Pirko } 124b90eb754SJiri Pirko 12506ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 12619baf839SRobert Olsson 12719baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 12895f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 12919baf839SRobert Olsson 13019baf839SRobert Olsson typedef unsigned int t_key; 13119baf839SRobert Olsson 13288bae714SAlexander Duyck #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) 13364c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 13464c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 1352373ce1cSRobert Olsson 13635c6edacSAlexander Duyck struct key_vector { 13741b489fdSAlexander Duyck t_key key; 13841b489fdSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 13941b489fdSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 14041b489fdSAlexander Duyck unsigned char slen; 14141b489fdSAlexander Duyck union { 14241b489fdSAlexander Duyck /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ 14379e5ad2cSAlexander Duyck struct hlist_head leaf; 14441b489fdSAlexander Duyck /* This array is valid if (pos | bits) > 0 (TNODE) */ 14535c6edacSAlexander Duyck struct key_vector __rcu *tnode[0]; 14619baf839SRobert Olsson }; 147adaf9816SAlexander Duyck }; 14819baf839SRobert Olsson 149dc35dbedSAlexander Duyck struct tnode { 15056ca2adfSAlexander Duyck struct rcu_head rcu; 1516e22d174SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 1526e22d174SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 153f23e59fbSAlexander Duyck struct key_vector __rcu *parent; 154dc35dbedSAlexander Duyck struct key_vector kv[1]; 15556ca2adfSAlexander Duyck #define tn_bits kv[0].bits 156dc35dbedSAlexander Duyck }; 157dc35dbedSAlexander Duyck 158dc35dbedSAlexander Duyck #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) 15941b489fdSAlexander Duyck #define LEAF_SIZE TNODE_SIZE(1) 16041b489fdSAlexander Duyck 16119baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 16219baf839SRobert Olsson struct trie_use_stats { 16319baf839SRobert Olsson unsigned int gets; 16419baf839SRobert Olsson unsigned int backtrack; 16519baf839SRobert Olsson unsigned int semantic_match_passed; 16619baf839SRobert Olsson unsigned int semantic_match_miss; 16719baf839SRobert Olsson unsigned int null_node_hit; 1682f36895aSRobert Olsson unsigned int resize_node_skipped; 16919baf839SRobert Olsson }; 17019baf839SRobert Olsson #endif 17119baf839SRobert Olsson 17219baf839SRobert Olsson struct trie_stat { 17319baf839SRobert Olsson unsigned int totdepth; 17419baf839SRobert Olsson unsigned int maxdepth; 17519baf839SRobert Olsson unsigned int tnodes; 17619baf839SRobert Olsson unsigned int leaves; 17719baf839SRobert Olsson unsigned int nullpointers; 17893672292SStephen Hemminger unsigned int prefixes; 17906ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 18019baf839SRobert Olsson }; 18119baf839SRobert Olsson 18219baf839SRobert Olsson struct trie { 18388bae714SAlexander Duyck struct key_vector kv[1]; 18419baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1858274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 18619baf839SRobert Olsson #endif 18719baf839SRobert Olsson }; 18819baf839SRobert Olsson 18988bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn); 190c3059477SJarek Poplawski static size_t tnode_free_size; 191c3059477SJarek Poplawski 192c3059477SJarek Poplawski /* 193c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 194c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 195c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 196c3059477SJarek Poplawski */ 197c3059477SJarek Poplawski static const int sync_pages = 128; 19819baf839SRobert Olsson 199e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 200bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 20119baf839SRobert Olsson 20256ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv) 20356ca2adfSAlexander Duyck { 20456ca2adfSAlexander Duyck return container_of(kv, struct tnode, kv[0]); 20556ca2adfSAlexander Duyck } 20656ca2adfSAlexander Duyck 20764c9b6fbSAlexander Duyck /* caller must hold RTNL */ 208f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) 209754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) 2100a5c0475SEric Dumazet 21164c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 212f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) 213754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) 2140a5c0475SEric Dumazet 21564c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 21635c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) 21706801916SStephen Hemminger { 218adaf9816SAlexander Duyck if (n) 219f23e59fbSAlexander Duyck rcu_assign_pointer(tn_info(n)->parent, tp); 22064c9b6fbSAlexander Duyck } 22164c9b6fbSAlexander Duyck 222f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) 22364c9b6fbSAlexander Duyck 22464c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 22564c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 22664c9b6fbSAlexander Duyck */ 2272e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn) 22864c9b6fbSAlexander Duyck { 22964c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 23006801916SStephen Hemminger } 2312373ce1cSRobert Olsson 23288bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) 23388bae714SAlexander Duyck 2342e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv) 2352e1ac88aSAlexander Duyck { 2362e1ac88aSAlexander Duyck unsigned long index = key ^ kv->key; 2372e1ac88aSAlexander Duyck 23888bae714SAlexander Duyck if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) 23988bae714SAlexander Duyck return 0; 24088bae714SAlexander Duyck 2412e1ac88aSAlexander Duyck return index >> kv->pos; 2422e1ac88aSAlexander Duyck } 2432e1ac88aSAlexander Duyck 244e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 245e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 246e9b44019SAlexander Duyck * all of the bits in that key are significant. 247e9b44019SAlexander Duyck * 248e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 249e9b44019SAlexander Duyck * 250e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 251e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 252e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 253e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 254e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 255e9b44019SAlexander Duyck * correct key path. 256e9b44019SAlexander Duyck * 257e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 258e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 259e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 260e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 261e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 262e9b44019SAlexander Duyck * 263e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 264e9b44019SAlexander Duyck * have many different meanings. 265e9b44019SAlexander Duyck * 266e9b44019SAlexander Duyck * Example: 267e9b44019SAlexander Duyck * _________________________________________________________________ 268e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 269e9b44019SAlexander Duyck * ----------------------------------------------------------------- 270e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 271e9b44019SAlexander Duyck * 272e9b44019SAlexander Duyck * _________________________________________________________________ 273e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 274e9b44019SAlexander Duyck * ----------------------------------------------------------------- 275e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 276e9b44019SAlexander Duyck * 277e9b44019SAlexander Duyck * tp->pos = 22 278e9b44019SAlexander Duyck * tp->bits = 3 279e9b44019SAlexander Duyck * n->pos = 13 280e9b44019SAlexander Duyck * n->bits = 4 281e9b44019SAlexander Duyck * 282e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 283e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 284e9b44019SAlexander Duyck * point we do not use them for anything. 285e9b44019SAlexander Duyck * 286e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 287e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 288e9b44019SAlexander Duyck * 'n' among tp's children. 289e9b44019SAlexander Duyck * 29098a384ecSXunlei Pang * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits 291e9b44019SAlexander Duyck * for the node n. 292e9b44019SAlexander Duyck * 293e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 294e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 295e9b44019SAlexander Duyck * 296e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 297e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 298e9b44019SAlexander Duyck * 29998a384ecSXunlei Pang * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown 300e9b44019SAlexander Duyck * at this point. 30119baf839SRobert Olsson */ 30219baf839SRobert Olsson 303f5026fabSDenis V. Lunev static const int halve_threshold = 25; 304f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 305345aa031SJarek Poplawski static const int halve_threshold_root = 15; 30680b71b80SJens Låås static const int inflate_threshold_root = 30; 3072373ce1cSRobert Olsson 3082373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3092373ce1cSRobert Olsson { 3102373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3112373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3122373ce1cSRobert Olsson } 3132373ce1cSRobert Olsson 3142373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3152373ce1cSRobert Olsson { 3162373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3172373ce1cSRobert Olsson } 3182373ce1cSRobert Olsson 31937fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 32035c6edacSAlexander Duyck ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 3211de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \ 32235c6edacSAlexander Duyck ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 32337fd30f2SAlexander Duyck 32437fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 3252373ce1cSRobert Olsson { 32656ca2adfSAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 32737fd30f2SAlexander Duyck 32856ca2adfSAlexander Duyck if (!n->tn_bits) 32937fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 33037fd30f2SAlexander Duyck else 3311d5cfdb0STetsuo Handa kvfree(n); 3322373ce1cSRobert Olsson } 3332373ce1cSRobert Olsson 33456ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 335387a5487SStephen Hemminger 336dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits) 3372373ce1cSRobert Olsson { 3381de3d87bSAlexander Duyck size_t size; 3391de3d87bSAlexander Duyck 3401de3d87bSAlexander Duyck /* verify bits is within bounds */ 3411de3d87bSAlexander Duyck if (bits > TNODE_VMALLOC_MAX) 3421de3d87bSAlexander Duyck return NULL; 3431de3d87bSAlexander Duyck 3441de3d87bSAlexander Duyck /* determine size and verify it is non-zero and didn't overflow */ 3451de3d87bSAlexander Duyck size = TNODE_SIZE(1ul << bits); 3461de3d87bSAlexander Duyck 3472373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3488d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 34915be75cdSStephen Hemminger else 3507a1c8e5aSEric Dumazet return vzalloc(size); 35115be75cdSStephen Hemminger } 3522373ce1cSRobert Olsson 35335c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n) 35495f60ea3SAlexander Duyck { 3556e22d174SAlexander Duyck ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; 35695f60ea3SAlexander Duyck } 35795f60ea3SAlexander Duyck 35835c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n) 35995f60ea3SAlexander Duyck { 3606e22d174SAlexander Duyck tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; 36195f60ea3SAlexander Duyck } 36295f60ea3SAlexander Duyck 36335c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 36419baf839SRobert Olsson { 365f38b24c9SFiro Yang struct key_vector *l; 366f38b24c9SFiro Yang struct tnode *kv; 367dc35dbedSAlexander Duyck 368f38b24c9SFiro Yang kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 369dc35dbedSAlexander Duyck if (!kv) 370dc35dbedSAlexander Duyck return NULL; 371dc35dbedSAlexander Duyck 372dc35dbedSAlexander Duyck /* initialize key vector */ 373f38b24c9SFiro Yang l = kv->kv; 37464c9b6fbSAlexander Duyck l->key = key; 375e9b44019SAlexander Duyck l->pos = 0; 37664c9b6fbSAlexander Duyck l->bits = 0; 377dc35dbedSAlexander Duyck l->slen = fa->fa_slen; 37864c9b6fbSAlexander Duyck 379d5d6487cSAlexander Duyck /* link leaf to fib alias */ 38079e5ad2cSAlexander Duyck INIT_HLIST_HEAD(&l->leaf); 381d5d6487cSAlexander Duyck hlist_add_head(&fa->fa_list, &l->leaf); 382dc35dbedSAlexander Duyck 38319baf839SRobert Olsson return l; 38419baf839SRobert Olsson } 38519baf839SRobert Olsson 38635c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits) 38719baf839SRobert Olsson { 38864c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 389f38b24c9SFiro Yang struct key_vector *tn; 390f38b24c9SFiro Yang struct tnode *tnode; 39164c9b6fbSAlexander Duyck 39264c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 39364c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 39419baf839SRobert Olsson 395f38b24c9SFiro Yang tnode = tnode_alloc(bits); 396dc35dbedSAlexander Duyck if (!tnode) 397dc35dbedSAlexander Duyck return NULL; 398dc35dbedSAlexander Duyck 399f38b24c9SFiro Yang pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 400f38b24c9SFiro Yang sizeof(struct key_vector *) << bits); 401f38b24c9SFiro Yang 40295f60ea3SAlexander Duyck if (bits == KEYLENGTH) 4036e22d174SAlexander Duyck tnode->full_children = 1; 40495f60ea3SAlexander Duyck else 4056e22d174SAlexander Duyck tnode->empty_children = 1ul << bits; 406c877efb2SStephen Hemminger 407f38b24c9SFiro Yang tn = tnode->kv; 408dc35dbedSAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 409dc35dbedSAlexander Duyck tn->pos = pos; 410dc35dbedSAlexander Duyck tn->bits = bits; 411dc35dbedSAlexander Duyck tn->slen = pos; 412dc35dbedSAlexander Duyck 41319baf839SRobert Olsson return tn; 41419baf839SRobert Olsson } 41519baf839SRobert Olsson 416e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 41719baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 41819baf839SRobert Olsson */ 41935c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n) 42019baf839SRobert Olsson { 421e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 42219baf839SRobert Olsson } 42319baf839SRobert Olsson 424ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 42519baf839SRobert Olsson * Update the value of full_children and empty_children. 42619baf839SRobert Olsson */ 42735c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i, 42835c6edacSAlexander Duyck struct key_vector *n) 42919baf839SRobert Olsson { 430754baf8dSAlexander Duyck struct key_vector *chi = get_child(tn, i); 431ff181ed8SAlexander Duyck int isfull, wasfull; 43219baf839SRobert Olsson 4332e1ac88aSAlexander Duyck BUG_ON(i >= child_length(tn)); 4340c7770c7SStephen Hemminger 43595f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 43600db4124SIan Morris if (!n && chi) 43795f60ea3SAlexander Duyck empty_child_inc(tn); 43800db4124SIan Morris if (n && !chi) 43995f60ea3SAlexander Duyck empty_child_dec(tn); 44019baf839SRobert Olsson 44119baf839SRobert Olsson /* update fullChildren */ 44219baf839SRobert Olsson wasfull = tnode_full(tn, chi); 44319baf839SRobert Olsson isfull = tnode_full(tn, n); 444ff181ed8SAlexander Duyck 44519baf839SRobert Olsson if (wasfull && !isfull) 4466e22d174SAlexander Duyck tn_info(tn)->full_children--; 44719baf839SRobert Olsson else if (!wasfull && isfull) 4486e22d174SAlexander Duyck tn_info(tn)->full_children++; 44991b9a277SOlof Johansson 4505405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4515405afd1SAlexander Duyck tn->slen = n->slen; 4525405afd1SAlexander Duyck 45341b489fdSAlexander Duyck rcu_assign_pointer(tn->tnode[i], n); 45419baf839SRobert Olsson } 45519baf839SRobert Olsson 45635c6edacSAlexander Duyck static void update_children(struct key_vector *tn) 45769fa57b1SAlexander Duyck { 45869fa57b1SAlexander Duyck unsigned long i; 45969fa57b1SAlexander Duyck 46069fa57b1SAlexander Duyck /* update all of the child parent pointers */ 4612e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 462754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 46369fa57b1SAlexander Duyck 46469fa57b1SAlexander Duyck if (!inode) 46569fa57b1SAlexander Duyck continue; 46669fa57b1SAlexander Duyck 46769fa57b1SAlexander Duyck /* Either update the children of a tnode that 46869fa57b1SAlexander Duyck * already belongs to us or update the child 46969fa57b1SAlexander Duyck * to point to ourselves. 47069fa57b1SAlexander Duyck */ 47169fa57b1SAlexander Duyck if (node_parent(inode) == tn) 47269fa57b1SAlexander Duyck update_children(inode); 47369fa57b1SAlexander Duyck else 47469fa57b1SAlexander Duyck node_set_parent(inode, tn); 47569fa57b1SAlexander Duyck } 47669fa57b1SAlexander Duyck } 47769fa57b1SAlexander Duyck 47888bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key, 47988bae714SAlexander Duyck struct key_vector *n) 480836a0123SAlexander Duyck { 48188bae714SAlexander Duyck if (IS_TRIE(tp)) 48288bae714SAlexander Duyck rcu_assign_pointer(tp->tnode[0], n); 483836a0123SAlexander Duyck else 48488bae714SAlexander Duyck put_child(tp, get_index(key, tp), n); 485836a0123SAlexander Duyck } 486836a0123SAlexander Duyck 48735c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn) 4880a5c0475SEric Dumazet { 48956ca2adfSAlexander Duyck tn_info(tn)->rcu.next = NULL; 4900a5c0475SEric Dumazet } 491fc86a93bSAlexander Duyck 49235c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn, 49335c6edacSAlexander Duyck struct key_vector *n) 494fc86a93bSAlexander Duyck { 49556ca2adfSAlexander Duyck tn_info(n)->rcu.next = tn_info(tn)->rcu.next; 49656ca2adfSAlexander Duyck tn_info(tn)->rcu.next = &tn_info(n)->rcu; 497fc86a93bSAlexander Duyck } 498fc86a93bSAlexander Duyck 49935c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn) 500fc86a93bSAlexander Duyck { 50156ca2adfSAlexander Duyck struct callback_head *head = &tn_info(tn)->rcu; 502fc86a93bSAlexander Duyck 503fc86a93bSAlexander Duyck while (head) { 504fc86a93bSAlexander Duyck head = head->next; 50541b489fdSAlexander Duyck tnode_free_size += TNODE_SIZE(1ul << tn->bits); 50637fd30f2SAlexander Duyck node_free(tn); 507fc86a93bSAlexander Duyck 50856ca2adfSAlexander Duyck tn = container_of(head, struct tnode, rcu)->kv; 509fc86a93bSAlexander Duyck } 510fc86a93bSAlexander Duyck 511fc86a93bSAlexander Duyck if (tnode_free_size >= PAGE_SIZE * sync_pages) { 512fc86a93bSAlexander Duyck tnode_free_size = 0; 513fc86a93bSAlexander Duyck synchronize_rcu(); 514fc86a93bSAlexander Duyck } 5150a5c0475SEric Dumazet } 5160a5c0475SEric Dumazet 51788bae714SAlexander Duyck static struct key_vector *replace(struct trie *t, 51835c6edacSAlexander Duyck struct key_vector *oldtnode, 51935c6edacSAlexander Duyck struct key_vector *tn) 52069fa57b1SAlexander Duyck { 52135c6edacSAlexander Duyck struct key_vector *tp = node_parent(oldtnode); 52269fa57b1SAlexander Duyck unsigned long i; 52369fa57b1SAlexander Duyck 52469fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 52569fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 52688bae714SAlexander Duyck put_child_root(tp, tn->key, tn); 52769fa57b1SAlexander Duyck 52869fa57b1SAlexander Duyck /* update all of the child parent pointers */ 52969fa57b1SAlexander Duyck update_children(tn); 53069fa57b1SAlexander Duyck 53169fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 53269fa57b1SAlexander Duyck tnode_free(oldtnode); 53369fa57b1SAlexander Duyck 53469fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 5352e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 536754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 53769fa57b1SAlexander Duyck 53869fa57b1SAlexander Duyck /* resize child node */ 53969fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 54088bae714SAlexander Duyck tn = resize(t, inode); 54169fa57b1SAlexander Duyck } 5428d8e810cSAlexander Duyck 54388bae714SAlexander Duyck return tp; 54469fa57b1SAlexander Duyck } 54569fa57b1SAlexander Duyck 54688bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t, 54735c6edacSAlexander Duyck struct key_vector *oldtnode) 54819baf839SRobert Olsson { 54935c6edacSAlexander Duyck struct key_vector *tn; 55069fa57b1SAlexander Duyck unsigned long i; 551e9b44019SAlexander Duyck t_key m; 55219baf839SRobert Olsson 5530c7770c7SStephen Hemminger pr_debug("In inflate\n"); 55419baf839SRobert Olsson 555e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5562f80b3c8SRobert Olsson if (!tn) 5578d8e810cSAlexander Duyck goto notnode; 5582f36895aSRobert Olsson 55969fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 56069fa57b1SAlexander Duyck tnode_free_init(oldtnode); 56169fa57b1SAlexander Duyck 56212c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 56312c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 56412c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 56512c081a5SAlexander Duyck * nodes. 5662f36895aSRobert Olsson */ 5672e1ac88aSAlexander Duyck for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { 568754baf8dSAlexander Duyck struct key_vector *inode = get_child(oldtnode, --i); 56935c6edacSAlexander Duyck struct key_vector *node0, *node1; 57069fa57b1SAlexander Duyck unsigned long j, k; 57119baf839SRobert Olsson 57219baf839SRobert Olsson /* An empty child */ 57351456b29SIan Morris if (!inode) 57419baf839SRobert Olsson continue; 57519baf839SRobert Olsson 57619baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 577adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 578e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 57919baf839SRobert Olsson continue; 58019baf839SRobert Olsson } 58119baf839SRobert Olsson 58269fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 58369fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 58469fa57b1SAlexander Duyck 58519baf839SRobert Olsson /* An internal node with two children */ 58619baf839SRobert Olsson if (inode->bits == 1) { 587754baf8dSAlexander Duyck put_child(tn, 2 * i + 1, get_child(inode, 1)); 588754baf8dSAlexander Duyck put_child(tn, 2 * i, get_child(inode, 0)); 58991b9a277SOlof Johansson continue; 59019baf839SRobert Olsson } 59119baf839SRobert Olsson 59219baf839SRobert Olsson /* We will replace this node 'inode' with two new 59312c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 59419baf839SRobert Olsson * original children. The two new nodes will have 59519baf839SRobert Olsson * a position one bit further down the key and this 59619baf839SRobert Olsson * means that the "significant" part of their keys 59719baf839SRobert Olsson * (see the discussion near the top of this file) 59819baf839SRobert Olsson * will differ by one bit, which will be "0" in 59912c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 60019baf839SRobert Olsson * moving the key position by one step, the bit that 60119baf839SRobert Olsson * we are moving away from - the bit at position 60212c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 60312c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 60419baf839SRobert Olsson * two new keys. 60519baf839SRobert Olsson */ 60612c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 60712c081a5SAlexander Duyck if (!node1) 60812c081a5SAlexander Duyck goto nomem; 60969fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 61019baf839SRobert Olsson 61169fa57b1SAlexander Duyck tnode_free_append(tn, node1); 61212c081a5SAlexander Duyck if (!node0) 61312c081a5SAlexander Duyck goto nomem; 61412c081a5SAlexander Duyck tnode_free_append(tn, node0); 6152f36895aSRobert Olsson 61612c081a5SAlexander Duyck /* populate child pointers in new nodes */ 6172e1ac88aSAlexander Duyck for (k = child_length(inode), j = k / 2; j;) { 618754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 619754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 620754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 621754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 62219baf839SRobert Olsson } 623ff181ed8SAlexander Duyck 62412c081a5SAlexander Duyck /* link new nodes to parent */ 62512c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 62612c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 62712c081a5SAlexander Duyck 62812c081a5SAlexander Duyck /* link parent to nodes */ 62912c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 63012c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 63112c081a5SAlexander Duyck } 63212c081a5SAlexander Duyck 63369fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6348d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 635ff181ed8SAlexander Duyck nomem: 636fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 637fc86a93bSAlexander Duyck tnode_free(tn); 6388d8e810cSAlexander Duyck notnode: 6398d8e810cSAlexander Duyck return NULL; 640ff181ed8SAlexander Duyck } 641ff181ed8SAlexander Duyck 64288bae714SAlexander Duyck static struct key_vector *halve(struct trie *t, 64335c6edacSAlexander Duyck struct key_vector *oldtnode) 64419baf839SRobert Olsson { 64535c6edacSAlexander Duyck struct key_vector *tn; 64612c081a5SAlexander Duyck unsigned long i; 64719baf839SRobert Olsson 6480c7770c7SStephen Hemminger pr_debug("In halve\n"); 64919baf839SRobert Olsson 650e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6512f80b3c8SRobert Olsson if (!tn) 6528d8e810cSAlexander Duyck goto notnode; 6532f36895aSRobert Olsson 65469fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 65569fa57b1SAlexander Duyck tnode_free_init(oldtnode); 65669fa57b1SAlexander Duyck 65712c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 65812c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 65912c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 66012c081a5SAlexander Duyck * nodes. 6612f36895aSRobert Olsson */ 6622e1ac88aSAlexander Duyck for (i = child_length(oldtnode); i;) { 663754baf8dSAlexander Duyck struct key_vector *node1 = get_child(oldtnode, --i); 664754baf8dSAlexander Duyck struct key_vector *node0 = get_child(oldtnode, --i); 66535c6edacSAlexander Duyck struct key_vector *inode; 6662f36895aSRobert Olsson 66712c081a5SAlexander Duyck /* At least one of the children is empty */ 66812c081a5SAlexander Duyck if (!node1 || !node0) { 66912c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 67012c081a5SAlexander Duyck continue; 67112c081a5SAlexander Duyck } 6722f36895aSRobert Olsson 6732f36895aSRobert Olsson /* Two nonempty children */ 67412c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 6758d8e810cSAlexander Duyck if (!inode) 6768d8e810cSAlexander Duyck goto nomem; 67712c081a5SAlexander Duyck tnode_free_append(tn, inode); 6782f80b3c8SRobert Olsson 67912c081a5SAlexander Duyck /* initialize pointers out of node */ 68012c081a5SAlexander Duyck put_child(inode, 1, node1); 68112c081a5SAlexander Duyck put_child(inode, 0, node0); 68212c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 68312c081a5SAlexander Duyck 68412c081a5SAlexander Duyck /* link parent to node */ 68512c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6862f36895aSRobert Olsson } 6872f36895aSRobert Olsson 68869fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6898d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 6908d8e810cSAlexander Duyck nomem: 6918d8e810cSAlexander Duyck /* all pointers should be clean so we are done */ 6928d8e810cSAlexander Duyck tnode_free(tn); 6938d8e810cSAlexander Duyck notnode: 6948d8e810cSAlexander Duyck return NULL; 6952f80b3c8SRobert Olsson } 69619baf839SRobert Olsson 69788bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t, 69888bae714SAlexander Duyck struct key_vector *oldtnode) 69995f60ea3SAlexander Duyck { 70035c6edacSAlexander Duyck struct key_vector *n, *tp; 70195f60ea3SAlexander Duyck unsigned long i; 70295f60ea3SAlexander Duyck 70395f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 7042e1ac88aSAlexander Duyck for (n = NULL, i = child_length(oldtnode); !n && i;) 705754baf8dSAlexander Duyck n = get_child(oldtnode, --i); 70695f60ea3SAlexander Duyck 70795f60ea3SAlexander Duyck /* compress one level */ 70895f60ea3SAlexander Duyck tp = node_parent(oldtnode); 70988bae714SAlexander Duyck put_child_root(tp, oldtnode->key, n); 71095f60ea3SAlexander Duyck node_set_parent(n, tp); 71195f60ea3SAlexander Duyck 71295f60ea3SAlexander Duyck /* drop dead node */ 71395f60ea3SAlexander Duyck node_free(oldtnode); 71488bae714SAlexander Duyck 71588bae714SAlexander Duyck return tp; 71695f60ea3SAlexander Duyck } 71795f60ea3SAlexander Duyck 71835c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn) 7195405afd1SAlexander Duyck { 7205405afd1SAlexander Duyck unsigned char slen = tn->pos; 7215405afd1SAlexander Duyck unsigned long stride, i; 722a52ca62cSAlexander Duyck unsigned char slen_max; 723a52ca62cSAlexander Duyck 724a52ca62cSAlexander Duyck /* only vector 0 can have a suffix length greater than or equal to 725a52ca62cSAlexander Duyck * tn->pos + tn->bits, the second highest node will have a suffix 726a52ca62cSAlexander Duyck * length at most of tn->pos + tn->bits - 1 727a52ca62cSAlexander Duyck */ 728a52ca62cSAlexander Duyck slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); 7295405afd1SAlexander Duyck 7305405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 7315405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 7325405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 7335405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 7345405afd1SAlexander Duyck */ 7352e1ac88aSAlexander Duyck for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { 736754baf8dSAlexander Duyck struct key_vector *n = get_child(tn, i); 7375405afd1SAlexander Duyck 7385405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 7395405afd1SAlexander Duyck continue; 7405405afd1SAlexander Duyck 7415405afd1SAlexander Duyck /* update stride and slen based on new value */ 7425405afd1SAlexander Duyck stride <<= (n->slen - slen); 7435405afd1SAlexander Duyck slen = n->slen; 7445405afd1SAlexander Duyck i &= ~(stride - 1); 7455405afd1SAlexander Duyck 746a52ca62cSAlexander Duyck /* stop searching if we have hit the maximum possible value */ 747a52ca62cSAlexander Duyck if (slen >= slen_max) 7485405afd1SAlexander Duyck break; 7495405afd1SAlexander Duyck } 7505405afd1SAlexander Duyck 7515405afd1SAlexander Duyck tn->slen = slen; 7525405afd1SAlexander Duyck 7535405afd1SAlexander Duyck return slen; 7545405afd1SAlexander Duyck } 7555405afd1SAlexander Duyck 756f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 757cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 758cf3637bbSAlexander Duyck * Telecommunications, page 6: 759cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 760cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 761cf3637bbSAlexander Duyck * 762cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 763cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 7642e1ac88aSAlexander Duyck * child_length() and instead of multiplying by 2 (since the 765cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 766cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 767cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 768cf3637bbSAlexander Duyck * 7692e1ac88aSAlexander Duyck * The left-hand side may look a bit weird: child_length(tn) 770cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 771cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 772cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 773cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 774cf3637bbSAlexander Duyck * we just count them one extra time here. 775cf3637bbSAlexander Duyck * 776cf3637bbSAlexander Duyck * A clearer way to write this would be: 777cf3637bbSAlexander Duyck * 778cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 7792e1ac88aSAlexander Duyck * not_to_be_doubled = child_length(tn) - tn->empty_children - 780cf3637bbSAlexander Duyck * tn->full_children; 781cf3637bbSAlexander Duyck * 7822e1ac88aSAlexander Duyck * new_child_length = child_length(tn) * 2; 783cf3637bbSAlexander Duyck * 784cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 785cf3637bbSAlexander Duyck * new_child_length; 786cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 787cf3637bbSAlexander Duyck * 788cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 789cf3637bbSAlexander Duyck * 790cf3637bbSAlexander Duyck * anyway, 791cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 792cf3637bbSAlexander Duyck * inflate_threshold 793cf3637bbSAlexander Duyck * 794cf3637bbSAlexander Duyck * avoid a division: 795cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 796cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 797cf3637bbSAlexander Duyck * 798cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 7992e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 800cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 801cf3637bbSAlexander Duyck * 802cf3637bbSAlexander Duyck * expand new_child_length: 8032e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 804cf3637bbSAlexander Duyck * tn->full_children) >= 8052e1ac88aSAlexander Duyck * inflate_threshold * child_length(tn) * 2 806cf3637bbSAlexander Duyck * 807cf3637bbSAlexander Duyck * shorten again: 8082e1ac88aSAlexander Duyck * 50 * (tn->full_children + child_length(tn) - 809cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 8102e1ac88aSAlexander Duyck * child_length(tn) 811cf3637bbSAlexander Duyck * 812cf3637bbSAlexander Duyck */ 81335c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) 814f05a4819SAlexander Duyck { 8152e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 816f05a4819SAlexander Duyck unsigned long threshold = used; 817cf3637bbSAlexander Duyck 818cf3637bbSAlexander Duyck /* Keep root node larger */ 81988bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; 8206e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 8216e22d174SAlexander Duyck used += tn_info(tn)->full_children; 822cf3637bbSAlexander Duyck 82395f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 82495f60ea3SAlexander Duyck 82595f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 826cf3637bbSAlexander Duyck } 827cf3637bbSAlexander Duyck 82835c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) 829f05a4819SAlexander Duyck { 8302e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 831f05a4819SAlexander Duyck unsigned long threshold = used; 832cf3637bbSAlexander Duyck 833f05a4819SAlexander Duyck /* Keep root node larger */ 83488bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; 8356e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 836f05a4819SAlexander Duyck 83795f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 83895f60ea3SAlexander Duyck 83995f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 84095f60ea3SAlexander Duyck } 84195f60ea3SAlexander Duyck 84235c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn) 84395f60ea3SAlexander Duyck { 8442e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 84595f60ea3SAlexander Duyck 8466e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 84795f60ea3SAlexander Duyck 84895f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 8496e22d174SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) 85095f60ea3SAlexander Duyck used -= KEY_MAX; 85195f60ea3SAlexander Duyck 85295f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 85395f60ea3SAlexander Duyck return used < 2; 854f05a4819SAlexander Duyck } 855f05a4819SAlexander Duyck 856f05a4819SAlexander Duyck #define MAX_WORK 10 85788bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn) 858f05a4819SAlexander Duyck { 8598d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8608d8e810cSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 8618d8e810cSAlexander Duyck #endif 86235c6edacSAlexander Duyck struct key_vector *tp = node_parent(tn); 86388bae714SAlexander Duyck unsigned long cindex = get_index(tn->key, tp); 864a80e89d4SAlexander Duyck int max_work = MAX_WORK; 865f05a4819SAlexander Duyck 866f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 867f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 868f05a4819SAlexander Duyck 869ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 870ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 871ff181ed8SAlexander Duyck * thing without us interfering 872ff181ed8SAlexander Duyck */ 87388bae714SAlexander Duyck BUG_ON(tn != get_child(tp, cindex)); 874ff181ed8SAlexander Duyck 875f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 876f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 877f05a4819SAlexander Duyck */ 878b6f15f82SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 87988bae714SAlexander Duyck tp = inflate(t, tn); 88088bae714SAlexander Duyck if (!tp) { 881cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8828d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 883cf3637bbSAlexander Duyck #endif 884cf3637bbSAlexander Duyck break; 885cf3637bbSAlexander Duyck } 886ff181ed8SAlexander Duyck 887b6f15f82SAlexander Duyck max_work--; 88888bae714SAlexander Duyck tn = get_child(tp, cindex); 889cf3637bbSAlexander Duyck } 890cf3637bbSAlexander Duyck 891b6f15f82SAlexander Duyck /* update parent in case inflate failed */ 892b6f15f82SAlexander Duyck tp = node_parent(tn); 893b6f15f82SAlexander Duyck 894cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 895cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 896b6f15f82SAlexander Duyck return tp; 897cf3637bbSAlexander Duyck 898f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 899cf3637bbSAlexander Duyck * node is above threshold. 900cf3637bbSAlexander Duyck */ 901b6f15f82SAlexander Duyck while (should_halve(tp, tn) && max_work) { 90288bae714SAlexander Duyck tp = halve(t, tn); 90388bae714SAlexander Duyck if (!tp) { 904cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 9058d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 906cf3637bbSAlexander Duyck #endif 907cf3637bbSAlexander Duyck break; 908cf3637bbSAlexander Duyck } 909cf3637bbSAlexander Duyck 910b6f15f82SAlexander Duyck max_work--; 91188bae714SAlexander Duyck tn = get_child(tp, cindex); 912ff181ed8SAlexander Duyck } 913cf3637bbSAlexander Duyck 914cf3637bbSAlexander Duyck /* Only one child remains */ 91588bae714SAlexander Duyck if (should_collapse(tn)) 91688bae714SAlexander Duyck return collapse(t, tn); 91788bae714SAlexander Duyck 918b6f15f82SAlexander Duyck /* update parent in case halve failed */ 919a52ca62cSAlexander Duyck return node_parent(tn); 920cf3637bbSAlexander Duyck } 921cf3637bbSAlexander Duyck 9221a239173SAlexander Duyck static void node_pull_suffix(struct key_vector *tn, unsigned char slen) 92319baf839SRobert Olsson { 9241a239173SAlexander Duyck unsigned char node_slen = tn->slen; 9251a239173SAlexander Duyck 9261a239173SAlexander Duyck while ((node_slen > tn->pos) && (node_slen > slen)) { 9271a239173SAlexander Duyck slen = update_suffix(tn); 9281a239173SAlexander Duyck if (node_slen == slen) 9295405afd1SAlexander Duyck break; 9301a239173SAlexander Duyck 9311a239173SAlexander Duyck tn = node_parent(tn); 9321a239173SAlexander Duyck node_slen = tn->slen; 9335405afd1SAlexander Duyck } 9345405afd1SAlexander Duyck } 9355405afd1SAlexander Duyck 9361a239173SAlexander Duyck static void node_push_suffix(struct key_vector *tn, unsigned char slen) 9375405afd1SAlexander Duyck { 9381a239173SAlexander Duyck while (tn->slen < slen) { 9391a239173SAlexander Duyck tn->slen = slen; 9405405afd1SAlexander Duyck tn = node_parent(tn); 9415405afd1SAlexander Duyck } 9425405afd1SAlexander Duyck } 9435405afd1SAlexander Duyck 9442373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 94535c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t, 94635c6edacSAlexander Duyck struct key_vector **tp, u32 key) 94719baf839SRobert Olsson { 94888bae714SAlexander Duyck struct key_vector *pn, *n = t->kv; 94988bae714SAlexander Duyck unsigned long index = 0; 95019baf839SRobert Olsson 95188bae714SAlexander Duyck do { 95288bae714SAlexander Duyck pn = n; 95388bae714SAlexander Duyck n = get_child_rcu(n, index); 95488bae714SAlexander Duyck 95588bae714SAlexander Duyck if (!n) 95688bae714SAlexander Duyck break; 95788bae714SAlexander Duyck 95888bae714SAlexander Duyck index = get_cindex(key, n); 95919baf839SRobert Olsson 960939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 961939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 962939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 963939afb06SAlexander Duyck * is the difference between the key and this value. From 964939afb06SAlexander Duyck * this we can actually derive several pieces of data. 965d4a975e8SAlexander Duyck * if (index >= (1ul << bits)) 966939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 967b3832117SAlexander Duyck * else 968b3832117SAlexander Duyck * we know the value is cindex 969d4a975e8SAlexander Duyck * 970d4a975e8SAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 971d4a975e8SAlexander Duyck * fact that we can only allocate a node with 32 bits if a 972d4a975e8SAlexander Duyck * long is greater than 32 bits. 973939afb06SAlexander Duyck */ 974d4a975e8SAlexander Duyck if (index >= (1ul << n->bits)) { 975d4a975e8SAlexander Duyck n = NULL; 976d4a975e8SAlexander Duyck break; 977d4a975e8SAlexander Duyck } 978939afb06SAlexander Duyck 97988bae714SAlexander Duyck /* keep searching until we find a perfect match leaf or NULL */ 98088bae714SAlexander Duyck } while (IS_TNODE(n)); 981939afb06SAlexander Duyck 98235c6edacSAlexander Duyck *tp = pn; 983d4a975e8SAlexander Duyck 984939afb06SAlexander Duyck return n; 98519baf839SRobert Olsson } 98619baf839SRobert Olsson 98702525368SAlexander Duyck /* Return the first fib alias matching TOS with 98802525368SAlexander Duyck * priority less than or equal to PRIO. 98902525368SAlexander Duyck */ 99079e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, 9910b65bd97SAlexander Duyck u8 tos, u32 prio, u32 tb_id) 99202525368SAlexander Duyck { 99302525368SAlexander Duyck struct fib_alias *fa; 99402525368SAlexander Duyck 99502525368SAlexander Duyck if (!fah) 99602525368SAlexander Duyck return NULL; 99702525368SAlexander Duyck 99856315f9eSAlexander Duyck hlist_for_each_entry(fa, fah, fa_list) { 99979e5ad2cSAlexander Duyck if (fa->fa_slen < slen) 100079e5ad2cSAlexander Duyck continue; 100179e5ad2cSAlexander Duyck if (fa->fa_slen != slen) 100279e5ad2cSAlexander Duyck break; 10030b65bd97SAlexander Duyck if (fa->tb_id > tb_id) 10040b65bd97SAlexander Duyck continue; 10050b65bd97SAlexander Duyck if (fa->tb_id != tb_id) 10060b65bd97SAlexander Duyck break; 100702525368SAlexander Duyck if (fa->fa_tos > tos) 100802525368SAlexander Duyck continue; 100902525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 101002525368SAlexander Duyck return fa; 101102525368SAlexander Duyck } 101202525368SAlexander Duyck 101302525368SAlexander Duyck return NULL; 101402525368SAlexander Duyck } 101502525368SAlexander Duyck 101635c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn) 101719baf839SRobert Olsson { 101888bae714SAlexander Duyck while (!IS_TRIE(tn)) 101988bae714SAlexander Duyck tn = resize(t, tn); 102019baf839SRobert Olsson } 102119baf839SRobert Olsson 102235c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp, 1023d5d6487cSAlexander Duyck struct fib_alias *new, t_key key) 102419baf839SRobert Olsson { 102535c6edacSAlexander Duyck struct key_vector *n, *l; 1026836a0123SAlexander Duyck 1027d5d6487cSAlexander Duyck l = leaf_new(key, new); 102879e5ad2cSAlexander Duyck if (!l) 10298d8e810cSAlexander Duyck goto noleaf; 1030d5d6487cSAlexander Duyck 1031d5d6487cSAlexander Duyck /* retrieve child from parent node */ 1032754baf8dSAlexander Duyck n = get_child(tp, get_index(key, tp)); 103319baf839SRobert Olsson 1034836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1035836a0123SAlexander Duyck * 103619baf839SRobert Olsson * Add a new tnode here 103719baf839SRobert Olsson * first tnode need some special handling 1038836a0123SAlexander Duyck * leaves us in position for handling as case 3 103919baf839SRobert Olsson */ 104019baf839SRobert Olsson if (n) { 104135c6edacSAlexander Duyck struct key_vector *tn; 1042f835e471SRobert Olsson 1043e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 10448d8e810cSAlexander Duyck if (!tn) 10458d8e810cSAlexander Duyck goto notnode; 104619baf839SRobert Olsson 1047836a0123SAlexander Duyck /* initialize routes out of node */ 1048836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1049836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 105019baf839SRobert Olsson 1051836a0123SAlexander Duyck /* start adding routes into the node */ 105288bae714SAlexander Duyck put_child_root(tp, key, tn); 1053836a0123SAlexander Duyck node_set_parent(n, tn); 105419baf839SRobert Olsson 1055836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1056e962f302SAlexander Duyck tp = tn; 105719baf839SRobert Olsson } 105891b9a277SOlof Johansson 1059836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1060a52ca62cSAlexander Duyck node_push_suffix(tp, new->fa_slen); 1061836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 106288bae714SAlexander Duyck put_child_root(tp, key, l); 10637b85576dSJarek Poplawski trie_rebalance(t, tp); 1064d5d6487cSAlexander Duyck 1065d5d6487cSAlexander Duyck return 0; 10668d8e810cSAlexander Duyck notnode: 10678d8e810cSAlexander Duyck node_free(l); 10688d8e810cSAlexander Duyck noleaf: 10698d8e810cSAlexander Duyck return -ENOMEM; 1070d5d6487cSAlexander Duyck } 1071d5d6487cSAlexander Duyck 107235c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp, 107335c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *new, 1074d5d6487cSAlexander Duyck struct fib_alias *fa, t_key key) 1075d5d6487cSAlexander Duyck { 1076d5d6487cSAlexander Duyck if (!l) 1077d5d6487cSAlexander Duyck return fib_insert_node(t, tp, new, key); 1078d5d6487cSAlexander Duyck 1079d5d6487cSAlexander Duyck if (fa) { 1080d5d6487cSAlexander Duyck hlist_add_before_rcu(&new->fa_list, &fa->fa_list); 1081836a0123SAlexander Duyck } else { 1082d5d6487cSAlexander Duyck struct fib_alias *last; 1083d5d6487cSAlexander Duyck 1084d5d6487cSAlexander Duyck hlist_for_each_entry(last, &l->leaf, fa_list) { 1085d5d6487cSAlexander Duyck if (new->fa_slen < last->fa_slen) 1086d5d6487cSAlexander Duyck break; 10870b65bd97SAlexander Duyck if ((new->fa_slen == last->fa_slen) && 10880b65bd97SAlexander Duyck (new->tb_id > last->tb_id)) 10890b65bd97SAlexander Duyck break; 1090d5d6487cSAlexander Duyck fa = last; 1091836a0123SAlexander Duyck } 1092836a0123SAlexander Duyck 1093d5d6487cSAlexander Duyck if (fa) 1094d5d6487cSAlexander Duyck hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); 1095d5d6487cSAlexander Duyck else 1096d5d6487cSAlexander Duyck hlist_add_head_rcu(&new->fa_list, &l->leaf); 109719baf839SRobert Olsson } 109819baf839SRobert Olsson 1099d5d6487cSAlexander Duyck /* if we added to the tail node then we need to update slen */ 1100d5d6487cSAlexander Duyck if (l->slen < new->fa_slen) { 1101d5d6487cSAlexander Duyck l->slen = new->fa_slen; 11021a239173SAlexander Duyck node_push_suffix(tp, new->fa_slen); 1103d5d6487cSAlexander Duyck } 1104d5d6487cSAlexander Duyck 1105d5d6487cSAlexander Duyck return 0; 1106d5d6487cSAlexander Duyck } 1107d5d6487cSAlexander Duyck 1108d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1109b90eb754SJiri Pirko int fib_table_insert(struct net *net, struct fib_table *tb, 1110b90eb754SJiri Pirko struct fib_config *cfg) 111119baf839SRobert Olsson { 111219baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 111319baf839SRobert Olsson struct fib_alias *fa, *new_fa; 111435c6edacSAlexander Duyck struct key_vector *l, *tp; 1115b93e1fa7SGuillaume Nault u16 nlflags = NLM_F_EXCL; 111619baf839SRobert Olsson struct fib_info *fi; 111779e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 111879e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 11194e902c57SThomas Graf u8 tos = cfg->fc_tos; 1120d4a975e8SAlexander Duyck u32 key; 112119baf839SRobert Olsson int err; 112219baf839SRobert Olsson 11235786ec60SAlexander Duyck if (plen > KEYLENGTH) 112419baf839SRobert Olsson return -EINVAL; 112519baf839SRobert Olsson 11264e902c57SThomas Graf key = ntohl(cfg->fc_dst); 112719baf839SRobert Olsson 11282dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 112919baf839SRobert Olsson 1130d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 113119baf839SRobert Olsson return -EINVAL; 113219baf839SRobert Olsson 11334e902c57SThomas Graf fi = fib_create_info(cfg); 11344e902c57SThomas Graf if (IS_ERR(fi)) { 11354e902c57SThomas Graf err = PTR_ERR(fi); 113619baf839SRobert Olsson goto err; 11374e902c57SThomas Graf } 113819baf839SRobert Olsson 1139d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 11400b65bd97SAlexander Duyck fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, 11410b65bd97SAlexander Duyck tb->tb_id) : NULL; 114219baf839SRobert Olsson 114319baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 114419baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 114519baf839SRobert Olsson * exists or to the node before which we will insert new one. 114619baf839SRobert Olsson * 114719baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 114856315f9eSAlexander Duyck * insert to the tail of the section matching the suffix length 114956315f9eSAlexander Duyck * of the new alias. 115019baf839SRobert Olsson */ 115119baf839SRobert Olsson 1152936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1153936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1154936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 115519baf839SRobert Olsson 115619baf839SRobert Olsson err = -EEXIST; 11574e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 115819baf839SRobert Olsson goto out; 115919baf839SRobert Olsson 1160b93e1fa7SGuillaume Nault nlflags &= ~NLM_F_EXCL; 1161b93e1fa7SGuillaume Nault 1162936f6f8eSJulian Anastasov /* We have 2 goals: 1163936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1164936f6f8eSJulian Anastasov * duplicate routes 1165936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1166936f6f8eSJulian Anastasov */ 1167936f6f8eSJulian Anastasov fa_match = NULL; 1168936f6f8eSJulian Anastasov fa_first = fa; 116956315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 11700b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 11710b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 11720b65bd97SAlexander Duyck (fa->fa_tos != tos)) 1173936f6f8eSJulian Anastasov break; 1174936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1175936f6f8eSJulian Anastasov break; 1176936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1177936f6f8eSJulian Anastasov fa->fa_info == fi) { 1178936f6f8eSJulian Anastasov fa_match = fa; 1179936f6f8eSJulian Anastasov break; 1180936f6f8eSJulian Anastasov } 1181936f6f8eSJulian Anastasov } 1182936f6f8eSJulian Anastasov 11834e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 118419baf839SRobert Olsson struct fib_info *fi_drop; 118519baf839SRobert Olsson u8 state; 118619baf839SRobert Olsson 1187b93e1fa7SGuillaume Nault nlflags |= NLM_F_REPLACE; 1188936f6f8eSJulian Anastasov fa = fa_first; 1189936f6f8eSJulian Anastasov if (fa_match) { 1190936f6f8eSJulian Anastasov if (fa == fa_match) 1191936f6f8eSJulian Anastasov err = 0; 11926725033fSJoonwoo Park goto out; 1193936f6f8eSJulian Anastasov } 11942373ce1cSRobert Olsson err = -ENOBUFS; 1195e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 119651456b29SIan Morris if (!new_fa) 11972373ce1cSRobert Olsson goto out; 119819baf839SRobert Olsson 119919baf839SRobert Olsson fi_drop = fa->fa_info; 12002373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12012373ce1cSRobert Olsson new_fa->fa_info = fi; 12024e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 120319baf839SRobert Olsson state = fa->fa_state; 1204936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 12059b6ebad5SAlexander Duyck new_fa->fa_slen = fa->fa_slen; 1206d4e64c29SMichal Kubeček new_fa->tb_id = tb->tb_id; 12072392debcSJulian Anastasov new_fa->fa_default = -1; 120819baf839SRobert Olsson 120956315f9eSAlexander Duyck hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12108e05fd71SScott Feldman 12112373ce1cSRobert Olsson alias_free_mem_rcu(fa); 121219baf839SRobert Olsson 121319baf839SRobert Olsson fib_release_info(fi_drop); 121419baf839SRobert Olsson if (state & FA_S_ACCESSED) 12154ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1216b90eb754SJiri Pirko 1217b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1218b90eb754SJiri Pirko key, plen, fi, 1219b90eb754SJiri Pirko new_fa->fa_tos, cfg->fc_type, 1220b90eb754SJiri Pirko tb->tb_id, cfg->fc_nlflags); 1221b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1222b93e1fa7SGuillaume Nault tb->tb_id, &cfg->fc_nlinfo, nlflags); 122319baf839SRobert Olsson 122419baf839SRobert Olsson goto succeeded; 122519baf839SRobert Olsson } 122619baf839SRobert Olsson /* Error if we find a perfect match which 122719baf839SRobert Olsson * uses the same scope, type, and nexthop 122819baf839SRobert Olsson * information. 122919baf839SRobert Olsson */ 1230936f6f8eSJulian Anastasov if (fa_match) 123119baf839SRobert Olsson goto out; 1232a07f5f50SStephen Hemminger 1233a2bb6d7dSRoopa Prabhu if (cfg->fc_nlflags & NLM_F_APPEND) 1234b93e1fa7SGuillaume Nault nlflags |= NLM_F_APPEND; 1235a2bb6d7dSRoopa Prabhu else 1236936f6f8eSJulian Anastasov fa = fa_first; 123719baf839SRobert Olsson } 123819baf839SRobert Olsson err = -ENOENT; 12394e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 124019baf839SRobert Olsson goto out; 124119baf839SRobert Olsson 1242b93e1fa7SGuillaume Nault nlflags |= NLM_F_CREATE; 124319baf839SRobert Olsson err = -ENOBUFS; 1244e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 124551456b29SIan Morris if (!new_fa) 124619baf839SRobert Olsson goto out; 124719baf839SRobert Olsson 124819baf839SRobert Olsson new_fa->fa_info = fi; 124919baf839SRobert Olsson new_fa->fa_tos = tos; 12504e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 125119baf839SRobert Olsson new_fa->fa_state = 0; 125279e5ad2cSAlexander Duyck new_fa->fa_slen = slen; 12530ddcf43dSAlexander Duyck new_fa->tb_id = tb->tb_id; 12542392debcSJulian Anastasov new_fa->fa_default = -1; 125519baf839SRobert Olsson 12569b6ebad5SAlexander Duyck /* Insert new entry to the list. */ 1257d5d6487cSAlexander Duyck err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1258d5d6487cSAlexander Duyck if (err) 1259347e3b28SJiri Pirko goto out_free_new_fa; 126019baf839SRobert Olsson 126121d8c49eSDavid S. Miller if (!plen) 126221d8c49eSDavid S. Miller tb->tb_num_default++; 126321d8c49eSDavid S. Miller 12644ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1265b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, 1266b90eb754SJiri Pirko cfg->fc_type, tb->tb_id, cfg->fc_nlflags); 12670ddcf43dSAlexander Duyck rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1268a2bb6d7dSRoopa Prabhu &cfg->fc_nlinfo, nlflags); 126919baf839SRobert Olsson succeeded: 127019baf839SRobert Olsson return 0; 1271f835e471SRobert Olsson 1272f835e471SRobert Olsson out_free_new_fa: 1273f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 127419baf839SRobert Olsson out: 127519baf839SRobert Olsson fib_release_info(fi); 127691b9a277SOlof Johansson err: 127719baf839SRobert Olsson return err; 127819baf839SRobert Olsson } 127919baf839SRobert Olsson 128035c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n) 12819f9e636dSAlexander Duyck { 12829f9e636dSAlexander Duyck t_key prefix = n->key; 12839f9e636dSAlexander Duyck 12849f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 12859f9e636dSAlexander Duyck } 12869f9e636dSAlexander Duyck 1287345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 128822bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1289ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 129019baf839SRobert Olsson { 129119baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 12928274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 12938274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 12948274a97aSAlexander Duyck #endif 12959f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 129635c6edacSAlexander Duyck struct key_vector *n, *pn; 129779e5ad2cSAlexander Duyck struct fib_alias *fa; 129871e8b67dSAlexander Duyck unsigned long index; 12999f9e636dSAlexander Duyck t_key cindex; 130019baf839SRobert Olsson 1301f6d3c192SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp); 1302f6d3c192SDavid Ahern 130388bae714SAlexander Duyck pn = t->kv; 130488bae714SAlexander Duyck cindex = 0; 130588bae714SAlexander Duyck 130688bae714SAlexander Duyck n = get_child_rcu(pn, cindex); 130719baf839SRobert Olsson if (!n) 1308345e9b54SAlexander Duyck return -EAGAIN; 130919baf839SRobert Olsson 131019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13118274a97aSAlexander Duyck this_cpu_inc(stats->gets); 131219baf839SRobert Olsson #endif 131319baf839SRobert Olsson 13149f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 13159f9e636dSAlexander Duyck for (;;) { 131688bae714SAlexander Duyck index = get_cindex(key, n); 13179f9e636dSAlexander Duyck 13189f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13199f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13209f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13219f9e636dSAlexander Duyck * is the difference between the key and this value. From 13229f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 132371e8b67dSAlexander Duyck * if (index >= (1ul << bits)) 13249f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1325b3832117SAlexander Duyck * else 1326b3832117SAlexander Duyck * we know the value is cindex 132771e8b67dSAlexander Duyck * 132871e8b67dSAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 132971e8b67dSAlexander Duyck * fact that we can only allocate a node with 32 bits if a 133071e8b67dSAlexander Duyck * long is greater than 32 bits. 13319f9e636dSAlexander Duyck */ 133271e8b67dSAlexander Duyck if (index >= (1ul << n->bits)) 13339f9e636dSAlexander Duyck break; 13349f9e636dSAlexander Duyck 13359f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13369f9e636dSAlexander Duyck if (IS_LEAF(n)) 1337a07f5f50SStephen Hemminger goto found; 13389f9e636dSAlexander Duyck 13399f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13409f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13419f9e636dSAlexander Duyck */ 13425405afd1SAlexander Duyck if (n->slen > n->pos) { 13439f9e636dSAlexander Duyck pn = n; 13449f9e636dSAlexander Duyck cindex = index; 134519baf839SRobert Olsson } 1346a07f5f50SStephen Hemminger 1347754baf8dSAlexander Duyck n = get_child_rcu(n, index); 13489f9e636dSAlexander Duyck if (unlikely(!n)) 13499f9e636dSAlexander Duyck goto backtrace; 13509f9e636dSAlexander Duyck } 135119baf839SRobert Olsson 13529f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13539f9e636dSAlexander Duyck for (;;) { 13549f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 135535c6edacSAlexander Duyck struct key_vector __rcu **cptr = n->tnode; 135619baf839SRobert Olsson 13579f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13589f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13599f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13609f9e636dSAlexander Duyck */ 13615405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13629f9e636dSAlexander Duyck goto backtrace; 136319baf839SRobert Olsson 13649f9e636dSAlexander Duyck /* exit out and process leaf */ 13659f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13669f9e636dSAlexander Duyck break; 136719baf839SRobert Olsson 13689f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13699f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 13709f9e636dSAlexander Duyck * we started this traversal anyway 13719f9e636dSAlexander Duyck */ 13729f9e636dSAlexander Duyck 13739f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 13749f9e636dSAlexander Duyck backtrace: 137519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13769f9e636dSAlexander Duyck if (!n) 13778274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 137819baf839SRobert Olsson #endif 13799f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 13809f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 13819f9e636dSAlexander Duyck * up one level to see if there are any more bits to 13829f9e636dSAlexander Duyck * be stripped there. 138319baf839SRobert Olsson */ 13849f9e636dSAlexander Duyck while (!cindex) { 13859f9e636dSAlexander Duyck t_key pkey = pn->key; 138619baf839SRobert Olsson 138788bae714SAlexander Duyck /* If we don't have a parent then there is 138888bae714SAlexander Duyck * nothing for us to do as we do not have any 138988bae714SAlexander Duyck * further nodes to parse. 139088bae714SAlexander Duyck */ 139188bae714SAlexander Duyck if (IS_TRIE(pn)) 1392345e9b54SAlexander Duyck return -EAGAIN; 139319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13948274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 139519baf839SRobert Olsson #endif 13969f9e636dSAlexander Duyck /* Get Child's index */ 139788bae714SAlexander Duyck pn = node_parent_rcu(pn); 13989f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 13999f9e636dSAlexander Duyck } 14009f9e636dSAlexander Duyck 14019f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 14029f9e636dSAlexander Duyck cindex &= cindex - 1; 14039f9e636dSAlexander Duyck 14049f9e636dSAlexander Duyck /* grab pointer for next child node */ 140541b489fdSAlexander Duyck cptr = &pn->tnode[cindex]; 140619baf839SRobert Olsson } 140719baf839SRobert Olsson } 14089f9e636dSAlexander Duyck 140919baf839SRobert Olsson found: 141071e8b67dSAlexander Duyck /* this line carries forward the xor from earlier in the function */ 141171e8b67dSAlexander Duyck index = key ^ n->key; 141271e8b67dSAlexander Duyck 14139f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 141479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 1415345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1416345e9b54SAlexander Duyck int nhsel, err; 1417345e9b54SAlexander Duyck 1418a5829f53SAlexander Duyck if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { 1419a5829f53SAlexander Duyck if (index >= (1ul << fa->fa_slen)) 14209b6ebad5SAlexander Duyck continue; 1421a5829f53SAlexander Duyck } 1422345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1423345e9b54SAlexander Duyck continue; 1424345e9b54SAlexander Duyck if (fi->fib_dead) 1425345e9b54SAlexander Duyck continue; 1426345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1427345e9b54SAlexander Duyck continue; 1428345e9b54SAlexander Duyck fib_alias_accessed(fa); 1429345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1430345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1431345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1432345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1433345e9b54SAlexander Duyck #endif 1434345e9b54SAlexander Duyck return err; 1435345e9b54SAlexander Duyck } 1436345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1437345e9b54SAlexander Duyck continue; 1438345e9b54SAlexander Duyck for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1439345e9b54SAlexander Duyck const struct fib_nh *nh = &fi->fib_nh[nhsel]; 14400eeb075fSAndy Gospodarek struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); 1441345e9b54SAlexander Duyck 1442345e9b54SAlexander Duyck if (nh->nh_flags & RTNH_F_DEAD) 1443345e9b54SAlexander Duyck continue; 14440eeb075fSAndy Gospodarek if (in_dev && 14450eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 14460eeb075fSAndy Gospodarek nh->nh_flags & RTNH_F_LINKDOWN && 14470eeb075fSAndy Gospodarek !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 14480eeb075fSAndy Gospodarek continue; 144958189ca7SDavid Ahern if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { 1450613d09b3SDavid Ahern if (flp->flowi4_oif && 1451613d09b3SDavid Ahern flp->flowi4_oif != nh->nh_oif) 1452345e9b54SAlexander Duyck continue; 1453613d09b3SDavid Ahern } 1454345e9b54SAlexander Duyck 1455345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 1456345e9b54SAlexander Duyck atomic_inc(&fi->fib_clntref); 1457345e9b54SAlexander Duyck 14589b6ebad5SAlexander Duyck res->prefixlen = KEYLENGTH - fa->fa_slen; 1459345e9b54SAlexander Duyck res->nh_sel = nhsel; 1460345e9b54SAlexander Duyck res->type = fa->fa_type; 1461345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1462345e9b54SAlexander Duyck res->fi = fi; 1463345e9b54SAlexander Duyck res->table = tb; 146479e5ad2cSAlexander Duyck res->fa_head = &n->leaf; 1465345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1466345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1467345e9b54SAlexander Duyck #endif 1468f6d3c192SDavid Ahern trace_fib_table_lookup_nh(nh); 1469f6d3c192SDavid Ahern 1470345e9b54SAlexander Duyck return err; 1471345e9b54SAlexander Duyck } 1472345e9b54SAlexander Duyck } 1473345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1474345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1475345e9b54SAlexander Duyck #endif 14769f9e636dSAlexander Duyck goto backtrace; 147719baf839SRobert Olsson } 14786fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 147919baf839SRobert Olsson 148035c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp, 148135c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *old) 1482d5d6487cSAlexander Duyck { 1483d5d6487cSAlexander Duyck /* record the location of the previous list_info entry */ 1484d5d6487cSAlexander Duyck struct hlist_node **pprev = old->fa_list.pprev; 1485d5d6487cSAlexander Duyck struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); 1486d5d6487cSAlexander Duyck 1487d5d6487cSAlexander Duyck /* remove the fib_alias from the list */ 1488d5d6487cSAlexander Duyck hlist_del_rcu(&old->fa_list); 1489d5d6487cSAlexander Duyck 1490d5d6487cSAlexander Duyck /* if we emptied the list this leaf will be freed and we can sort 1491d5d6487cSAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 1492d562f1f8SRobert Olsson */ 1493d5d6487cSAlexander Duyck if (hlist_empty(&l->leaf)) { 1494a52ca62cSAlexander Duyck if (tp->slen == l->slen) 1495a52ca62cSAlexander Duyck node_pull_suffix(tp, tp->pos); 149688bae714SAlexander Duyck put_child_root(tp, l->key, NULL); 1497d5d6487cSAlexander Duyck node_free(l); 1498d5d6487cSAlexander Duyck trie_rebalance(t, tp); 1499d5d6487cSAlexander Duyck return; 1500d5d6487cSAlexander Duyck } 1501d5d6487cSAlexander Duyck 1502d5d6487cSAlexander Duyck /* only access fa if it is pointing at the last valid hlist_node */ 1503d5d6487cSAlexander Duyck if (*pprev) 1504d5d6487cSAlexander Duyck return; 1505d5d6487cSAlexander Duyck 1506d5d6487cSAlexander Duyck /* update the trie with the latest suffix length */ 1507d5d6487cSAlexander Duyck l->slen = fa->fa_slen; 15081a239173SAlexander Duyck node_pull_suffix(tp, fa->fa_slen); 1509d5d6487cSAlexander Duyck } 1510d5d6487cSAlexander Duyck 1511d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1512b90eb754SJiri Pirko int fib_table_delete(struct net *net, struct fib_table *tb, 1513b90eb754SJiri Pirko struct fib_config *cfg) 151419baf839SRobert Olsson { 151519baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 151619baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 151735c6edacSAlexander Duyck struct key_vector *l, *tp; 151879e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 151979e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 1520d4a975e8SAlexander Duyck u8 tos = cfg->fc_tos; 1521d4a975e8SAlexander Duyck u32 key; 152291b9a277SOlof Johansson 152379e5ad2cSAlexander Duyck if (plen > KEYLENGTH) 152419baf839SRobert Olsson return -EINVAL; 152519baf839SRobert Olsson 15264e902c57SThomas Graf key = ntohl(cfg->fc_dst); 152719baf839SRobert Olsson 1528d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 152919baf839SRobert Olsson return -EINVAL; 153019baf839SRobert Olsson 1531d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 153219baf839SRobert Olsson if (!l) 153319baf839SRobert Olsson return -ESRCH; 153419baf839SRobert Olsson 15350b65bd97SAlexander Duyck fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); 153619baf839SRobert Olsson if (!fa) 153719baf839SRobert Olsson return -ESRCH; 153819baf839SRobert Olsson 15390c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 154019baf839SRobert Olsson 154119baf839SRobert Olsson fa_to_delete = NULL; 154256315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 154319baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 154419baf839SRobert Olsson 15450b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 15460b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 15470b65bd97SAlexander Duyck (fa->fa_tos != tos)) 154819baf839SRobert Olsson break; 154919baf839SRobert Olsson 15504e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15514e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 155237e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 155374cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 155474cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15554e902c57SThomas Graf (!cfg->fc_protocol || 15564e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15574e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 155819baf839SRobert Olsson fa_to_delete = fa; 155919baf839SRobert Olsson break; 156019baf839SRobert Olsson } 156119baf839SRobert Olsson } 156219baf839SRobert Olsson 156391b9a277SOlof Johansson if (!fa_to_delete) 156491b9a277SOlof Johansson return -ESRCH; 156519baf839SRobert Olsson 1566b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, 1567b90eb754SJiri Pirko fa_to_delete->fa_info, tos, cfg->fc_type, 1568b90eb754SJiri Pirko tb->tb_id, 0); 1569d5d6487cSAlexander Duyck rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1570b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 157119baf839SRobert Olsson 157221d8c49eSDavid S. Miller if (!plen) 157321d8c49eSDavid S. Miller tb->tb_num_default--; 157421d8c49eSDavid S. Miller 1575d5d6487cSAlexander Duyck fib_remove_alias(t, tp, l, fa_to_delete); 15767289e6ddSAlexander Duyck 1577d5d6487cSAlexander Duyck if (fa_to_delete->fa_state & FA_S_ACCESSED) 15784ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 157919baf839SRobert Olsson 1580d5d6487cSAlexander Duyck fib_release_info(fa_to_delete->fa_info); 1581d5d6487cSAlexander Duyck alias_free_mem_rcu(fa_to_delete); 158219baf839SRobert Olsson return 0; 158319baf839SRobert Olsson } 158419baf839SRobert Olsson 15858be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */ 158635c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) 158719baf839SRobert Olsson { 158835c6edacSAlexander Duyck struct key_vector *pn, *n = *tn; 15898be33e95SAlexander Duyck unsigned long cindex; 159019baf839SRobert Olsson 15918be33e95SAlexander Duyck /* this loop is meant to try and find the key in the trie */ 159288bae714SAlexander Duyck do { 159388bae714SAlexander Duyck /* record parent and next child index */ 159488bae714SAlexander Duyck pn = n; 1595c2229fe1SAlexander Duyck cindex = (key > pn->key) ? get_index(key, pn) : 0; 159688bae714SAlexander Duyck 159788bae714SAlexander Duyck if (cindex >> pn->bits) 159888bae714SAlexander Duyck break; 159988bae714SAlexander Duyck 160088bae714SAlexander Duyck /* descend into the next child */ 160188bae714SAlexander Duyck n = get_child_rcu(pn, cindex++); 160288bae714SAlexander Duyck if (!n) 160388bae714SAlexander Duyck break; 16048be33e95SAlexander Duyck 16058be33e95SAlexander Duyck /* guarantee forward progress on the keys */ 16068be33e95SAlexander Duyck if (IS_LEAF(n) && (n->key >= key)) 16078be33e95SAlexander Duyck goto found; 160888bae714SAlexander Duyck } while (IS_TNODE(n)); 16098be33e95SAlexander Duyck 16108be33e95SAlexander Duyck /* this loop will search for the next leaf with a greater key */ 161188bae714SAlexander Duyck while (!IS_TRIE(pn)) { 16128be33e95SAlexander Duyck /* if we exhausted the parent node we will need to climb */ 16138be33e95SAlexander Duyck if (cindex >= (1ul << pn->bits)) { 16148be33e95SAlexander Duyck t_key pkey = pn->key; 16158be33e95SAlexander Duyck 16168be33e95SAlexander Duyck pn = node_parent_rcu(pn); 16178be33e95SAlexander Duyck cindex = get_index(pkey, pn) + 1; 16188be33e95SAlexander Duyck continue; 16198be33e95SAlexander Duyck } 16208be33e95SAlexander Duyck 16218be33e95SAlexander Duyck /* grab the next available node */ 1622754baf8dSAlexander Duyck n = get_child_rcu(pn, cindex++); 16238be33e95SAlexander Duyck if (!n) 162491b9a277SOlof Johansson continue; 162519baf839SRobert Olsson 16268be33e95SAlexander Duyck /* no need to compare keys since we bumped the index */ 16278be33e95SAlexander Duyck if (IS_LEAF(n)) 16288be33e95SAlexander Duyck goto found; 162982cfbb00SStephen Hemminger 163082cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 16318be33e95SAlexander Duyck pn = n; 16328be33e95SAlexander Duyck cindex = 0; 163319baf839SRobert Olsson } 163482cfbb00SStephen Hemminger 16358be33e95SAlexander Duyck *tn = pn; 163682cfbb00SStephen Hemminger return NULL; /* Root of trie */ 16378be33e95SAlexander Duyck found: 16388be33e95SAlexander Duyck /* if we are at the limit for keys just return NULL for the tnode */ 163988bae714SAlexander Duyck *tn = pn; 1640adaf9816SAlexander Duyck return n; 164182cfbb00SStephen Hemminger } 164282cfbb00SStephen Hemminger 16430ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb) 16440ddcf43dSAlexander Duyck { 16450ddcf43dSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 16460ddcf43dSAlexander Duyck struct key_vector *pn = t->kv; 16470ddcf43dSAlexander Duyck unsigned long cindex = 1; 16480ddcf43dSAlexander Duyck struct hlist_node *tmp; 16490ddcf43dSAlexander Duyck struct fib_alias *fa; 16500ddcf43dSAlexander Duyck 16510ddcf43dSAlexander Duyck /* walk trie in reverse order and free everything */ 16520ddcf43dSAlexander Duyck for (;;) { 16530ddcf43dSAlexander Duyck struct key_vector *n; 16540ddcf43dSAlexander Duyck 16550ddcf43dSAlexander Duyck if (!(cindex--)) { 16560ddcf43dSAlexander Duyck t_key pkey = pn->key; 16570ddcf43dSAlexander Duyck 16580ddcf43dSAlexander Duyck if (IS_TRIE(pn)) 16590ddcf43dSAlexander Duyck break; 16600ddcf43dSAlexander Duyck 16610ddcf43dSAlexander Duyck n = pn; 16620ddcf43dSAlexander Duyck pn = node_parent(pn); 16630ddcf43dSAlexander Duyck 16640ddcf43dSAlexander Duyck /* drop emptied tnode */ 16650ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16660ddcf43dSAlexander Duyck node_free(n); 16670ddcf43dSAlexander Duyck 16680ddcf43dSAlexander Duyck cindex = get_index(pkey, pn); 16690ddcf43dSAlexander Duyck 16700ddcf43dSAlexander Duyck continue; 16710ddcf43dSAlexander Duyck } 16720ddcf43dSAlexander Duyck 16730ddcf43dSAlexander Duyck /* grab the next available node */ 16740ddcf43dSAlexander Duyck n = get_child(pn, cindex); 16750ddcf43dSAlexander Duyck if (!n) 16760ddcf43dSAlexander Duyck continue; 16770ddcf43dSAlexander Duyck 16780ddcf43dSAlexander Duyck if (IS_TNODE(n)) { 16790ddcf43dSAlexander Duyck /* record pn and cindex for leaf walking */ 16800ddcf43dSAlexander Duyck pn = n; 16810ddcf43dSAlexander Duyck cindex = 1ul << n->bits; 16820ddcf43dSAlexander Duyck 16830ddcf43dSAlexander Duyck continue; 16840ddcf43dSAlexander Duyck } 16850ddcf43dSAlexander Duyck 16860ddcf43dSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 16870ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 16880ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 16890ddcf43dSAlexander Duyck } 16900ddcf43dSAlexander Duyck 16910ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16920ddcf43dSAlexander Duyck node_free(n); 16930ddcf43dSAlexander Duyck } 16940ddcf43dSAlexander Duyck 16950ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 16960ddcf43dSAlexander Duyck free_percpu(t->stats); 16970ddcf43dSAlexander Duyck #endif 16980ddcf43dSAlexander Duyck kfree(tb); 16990ddcf43dSAlexander Duyck } 17000ddcf43dSAlexander Duyck 17010ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) 17020ddcf43dSAlexander Duyck { 17030ddcf43dSAlexander Duyck struct trie *ot = (struct trie *)oldtb->tb_data; 17040ddcf43dSAlexander Duyck struct key_vector *l, *tp = ot->kv; 17050ddcf43dSAlexander Duyck struct fib_table *local_tb; 17060ddcf43dSAlexander Duyck struct fib_alias *fa; 17070ddcf43dSAlexander Duyck struct trie *lt; 17080ddcf43dSAlexander Duyck t_key key = 0; 17090ddcf43dSAlexander Duyck 17100ddcf43dSAlexander Duyck if (oldtb->tb_data == oldtb->__data) 17110ddcf43dSAlexander Duyck return oldtb; 17120ddcf43dSAlexander Duyck 17130ddcf43dSAlexander Duyck local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); 17140ddcf43dSAlexander Duyck if (!local_tb) 17150ddcf43dSAlexander Duyck return NULL; 17160ddcf43dSAlexander Duyck 17170ddcf43dSAlexander Duyck lt = (struct trie *)local_tb->tb_data; 17180ddcf43dSAlexander Duyck 17190ddcf43dSAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 17200ddcf43dSAlexander Duyck struct key_vector *local_l = NULL, *local_tp; 17210ddcf43dSAlexander Duyck 17220ddcf43dSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 17230ddcf43dSAlexander Duyck struct fib_alias *new_fa; 17240ddcf43dSAlexander Duyck 17250ddcf43dSAlexander Duyck if (local_tb->tb_id != fa->tb_id) 17260ddcf43dSAlexander Duyck continue; 17270ddcf43dSAlexander Duyck 17280ddcf43dSAlexander Duyck /* clone fa for new local table */ 17290ddcf43dSAlexander Duyck new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 17300ddcf43dSAlexander Duyck if (!new_fa) 17310ddcf43dSAlexander Duyck goto out; 17320ddcf43dSAlexander Duyck 17330ddcf43dSAlexander Duyck memcpy(new_fa, fa, sizeof(*fa)); 17340ddcf43dSAlexander Duyck 17350ddcf43dSAlexander Duyck /* insert clone into table */ 17360ddcf43dSAlexander Duyck if (!local_l) 17370ddcf43dSAlexander Duyck local_l = fib_find_node(lt, &local_tp, l->key); 17380ddcf43dSAlexander Duyck 17390ddcf43dSAlexander Duyck if (fib_insert_alias(lt, local_tp, local_l, new_fa, 17403114cdfeSAlexander Duyck NULL, l->key)) { 17413114cdfeSAlexander Duyck kmem_cache_free(fn_alias_kmem, new_fa); 17420ddcf43dSAlexander Duyck goto out; 17430ddcf43dSAlexander Duyck } 17443114cdfeSAlexander Duyck } 17450ddcf43dSAlexander Duyck 17460ddcf43dSAlexander Duyck /* stop loop if key wrapped back to 0 */ 17470ddcf43dSAlexander Duyck key = l->key + 1; 17480ddcf43dSAlexander Duyck if (key < l->key) 17490ddcf43dSAlexander Duyck break; 17500ddcf43dSAlexander Duyck } 17510ddcf43dSAlexander Duyck 17520ddcf43dSAlexander Duyck return local_tb; 17530ddcf43dSAlexander Duyck out: 17540ddcf43dSAlexander Duyck fib_trie_free(local_tb); 17550ddcf43dSAlexander Duyck 17560ddcf43dSAlexander Duyck return NULL; 17570ddcf43dSAlexander Duyck } 17580ddcf43dSAlexander Duyck 17593b709334SAlexander Duyck /* Caller must hold RTNL */ 17603b709334SAlexander Duyck void fib_table_flush_external(struct fib_table *tb) 17613b709334SAlexander Duyck { 17623b709334SAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 17633b709334SAlexander Duyck struct key_vector *pn = t->kv; 17643b709334SAlexander Duyck unsigned long cindex = 1; 17653b709334SAlexander Duyck struct hlist_node *tmp; 17663b709334SAlexander Duyck struct fib_alias *fa; 17673b709334SAlexander Duyck 17683b709334SAlexander Duyck /* walk trie in reverse order */ 17693b709334SAlexander Duyck for (;;) { 17703b709334SAlexander Duyck unsigned char slen = 0; 17713b709334SAlexander Duyck struct key_vector *n; 17723b709334SAlexander Duyck 17733b709334SAlexander Duyck if (!(cindex--)) { 17743b709334SAlexander Duyck t_key pkey = pn->key; 17753b709334SAlexander Duyck 17763b709334SAlexander Duyck /* cannot resize the trie vector */ 17773b709334SAlexander Duyck if (IS_TRIE(pn)) 17783b709334SAlexander Duyck break; 17793b709334SAlexander Duyck 1780a52ca62cSAlexander Duyck /* update the suffix to address pulled leaves */ 1781a52ca62cSAlexander Duyck if (pn->slen > pn->pos) 1782a52ca62cSAlexander Duyck update_suffix(pn); 1783a52ca62cSAlexander Duyck 17843b709334SAlexander Duyck /* resize completed node */ 17853b709334SAlexander Duyck pn = resize(t, pn); 17863b709334SAlexander Duyck cindex = get_index(pkey, pn); 17873b709334SAlexander Duyck 17883b709334SAlexander Duyck continue; 17893b709334SAlexander Duyck } 17903b709334SAlexander Duyck 17913b709334SAlexander Duyck /* grab the next available node */ 17923b709334SAlexander Duyck n = get_child(pn, cindex); 17933b709334SAlexander Duyck if (!n) 17943b709334SAlexander Duyck continue; 17953b709334SAlexander Duyck 17963b709334SAlexander Duyck if (IS_TNODE(n)) { 17973b709334SAlexander Duyck /* record pn and cindex for leaf walking */ 17983b709334SAlexander Duyck pn = n; 17993b709334SAlexander Duyck cindex = 1ul << n->bits; 18003b709334SAlexander Duyck 18013b709334SAlexander Duyck continue; 18023b709334SAlexander Duyck } 18033b709334SAlexander Duyck 18043b709334SAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 18053b709334SAlexander Duyck /* if alias was cloned to local then we just 18063b709334SAlexander Duyck * need to remove the local copy from main 18073b709334SAlexander Duyck */ 18083b709334SAlexander Duyck if (tb->tb_id != fa->tb_id) { 18093b709334SAlexander Duyck hlist_del_rcu(&fa->fa_list); 18103b709334SAlexander Duyck alias_free_mem_rcu(fa); 18113b709334SAlexander Duyck continue; 18123b709334SAlexander Duyck } 18133b709334SAlexander Duyck 18143b709334SAlexander Duyck /* record local slen */ 18153b709334SAlexander Duyck slen = fa->fa_slen; 18163b709334SAlexander Duyck } 18173b709334SAlexander Duyck 18183b709334SAlexander Duyck /* update leaf slen */ 18193b709334SAlexander Duyck n->slen = slen; 18203b709334SAlexander Duyck 18213b709334SAlexander Duyck if (hlist_empty(&n->leaf)) { 18223b709334SAlexander Duyck put_child_root(pn, n->key, NULL); 18233b709334SAlexander Duyck node_free(n); 18243b709334SAlexander Duyck } 18253b709334SAlexander Duyck } 18263b709334SAlexander Duyck } 18273b709334SAlexander Duyck 18288be33e95SAlexander Duyck /* Caller must hold RTNL. */ 1829b90eb754SJiri Pirko int fib_table_flush(struct net *net, struct fib_table *tb) 183019baf839SRobert Olsson { 183119baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 183288bae714SAlexander Duyck struct key_vector *pn = t->kv; 183388bae714SAlexander Duyck unsigned long cindex = 1; 18347289e6ddSAlexander Duyck struct hlist_node *tmp; 18357289e6ddSAlexander Duyck struct fib_alias *fa; 183682cfbb00SStephen Hemminger int found = 0; 183719baf839SRobert Olsson 18387289e6ddSAlexander Duyck /* walk trie in reverse order */ 183988bae714SAlexander Duyck for (;;) { 184088bae714SAlexander Duyck unsigned char slen = 0; 184188bae714SAlexander Duyck struct key_vector *n; 184288bae714SAlexander Duyck 184388bae714SAlexander Duyck if (!(cindex--)) { 18447289e6ddSAlexander Duyck t_key pkey = pn->key; 18457289e6ddSAlexander Duyck 184688bae714SAlexander Duyck /* cannot resize the trie vector */ 184788bae714SAlexander Duyck if (IS_TRIE(pn)) 184888bae714SAlexander Duyck break; 18497289e6ddSAlexander Duyck 1850a52ca62cSAlexander Duyck /* update the suffix to address pulled leaves */ 1851a52ca62cSAlexander Duyck if (pn->slen > pn->pos) 1852a52ca62cSAlexander Duyck update_suffix(pn); 1853a52ca62cSAlexander Duyck 18547289e6ddSAlexander Duyck /* resize completed node */ 185588bae714SAlexander Duyck pn = resize(t, pn); 18567289e6ddSAlexander Duyck cindex = get_index(pkey, pn); 185788bae714SAlexander Duyck 185888bae714SAlexander Duyck continue; 185964c62723SAlexander Duyck } 186064c62723SAlexander Duyck 18617289e6ddSAlexander Duyck /* grab the next available node */ 1862754baf8dSAlexander Duyck n = get_child(pn, cindex); 186388bae714SAlexander Duyck if (!n) 186488bae714SAlexander Duyck continue; 186519baf839SRobert Olsson 186688bae714SAlexander Duyck if (IS_TNODE(n)) { 186788bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 186888bae714SAlexander Duyck pn = n; 186988bae714SAlexander Duyck cindex = 1ul << n->bits; 187088bae714SAlexander Duyck 187188bae714SAlexander Duyck continue; 187288bae714SAlexander Duyck } 18737289e6ddSAlexander Duyck 18747289e6ddSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 18757289e6ddSAlexander Duyck struct fib_info *fi = fa->fa_info; 18767289e6ddSAlexander Duyck 187788bae714SAlexander Duyck if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { 187888bae714SAlexander Duyck slen = fa->fa_slen; 187988bae714SAlexander Duyck continue; 188088bae714SAlexander Duyck } 188188bae714SAlexander Duyck 1882b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1883b90eb754SJiri Pirko n->key, 1884b90eb754SJiri Pirko KEYLENGTH - fa->fa_slen, 1885b90eb754SJiri Pirko fi, fa->fa_tos, fa->fa_type, 1886b90eb754SJiri Pirko tb->tb_id, 0); 18877289e6ddSAlexander Duyck hlist_del_rcu(&fa->fa_list); 18887289e6ddSAlexander Duyck fib_release_info(fa->fa_info); 18897289e6ddSAlexander Duyck alias_free_mem_rcu(fa); 18907289e6ddSAlexander Duyck found++; 18917289e6ddSAlexander Duyck } 18927289e6ddSAlexander Duyck 18937289e6ddSAlexander Duyck /* update leaf slen */ 18947289e6ddSAlexander Duyck n->slen = slen; 18957289e6ddSAlexander Duyck 18967289e6ddSAlexander Duyck if (hlist_empty(&n->leaf)) { 189788bae714SAlexander Duyck put_child_root(pn, n->key, NULL); 18987289e6ddSAlexander Duyck node_free(n); 18997289e6ddSAlexander Duyck } 190088bae714SAlexander Duyck } 19017289e6ddSAlexander Duyck 19020c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 190319baf839SRobert Olsson return found; 190419baf839SRobert Olsson } 190519baf839SRobert Olsson 1906a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head) 19074aa2c466SPavel Emelyanov { 1908a7e53531SAlexander Duyck struct fib_table *tb = container_of(head, struct fib_table, rcu); 19098274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 19108274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 19118274a97aSAlexander Duyck 19120ddcf43dSAlexander Duyck if (tb->tb_data == tb->__data) 19138274a97aSAlexander Duyck free_percpu(t->stats); 19148274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 19154aa2c466SPavel Emelyanov kfree(tb); 19164aa2c466SPavel Emelyanov } 19174aa2c466SPavel Emelyanov 1918a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb) 1919a7e53531SAlexander Duyck { 1920a7e53531SAlexander Duyck call_rcu(&tb->rcu, __trie_free_rcu); 1921a7e53531SAlexander Duyck } 1922a7e53531SAlexander Duyck 192335c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 192419baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 192519baf839SRobert Olsson { 192679e5ad2cSAlexander Duyck __be32 xkey = htonl(l->key); 192719baf839SRobert Olsson struct fib_alias *fa; 192879e5ad2cSAlexander Duyck int i, s_i; 192919baf839SRobert Olsson 193079e5ad2cSAlexander Duyck s_i = cb->args[4]; 193119baf839SRobert Olsson i = 0; 193219baf839SRobert Olsson 19332373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 193479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 193519baf839SRobert Olsson if (i < s_i) { 193619baf839SRobert Olsson i++; 193719baf839SRobert Olsson continue; 193819baf839SRobert Olsson } 193919baf839SRobert Olsson 19400ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 19410ddcf43dSAlexander Duyck i++; 19420ddcf43dSAlexander Duyck continue; 19430ddcf43dSAlexander Duyck } 19440ddcf43dSAlexander Duyck 194515e47304SEric W. Biederman if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 194619baf839SRobert Olsson cb->nlh->nlmsg_seq, 194719baf839SRobert Olsson RTM_NEWROUTE, 194819baf839SRobert Olsson tb->tb_id, 194919baf839SRobert Olsson fa->fa_type, 1950be403ea1SThomas Graf xkey, 19519b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 195219baf839SRobert Olsson fa->fa_tos, 195364347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 195471d67e66SStephen Hemminger cb->args[4] = i; 195519baf839SRobert Olsson return -1; 195619baf839SRobert Olsson } 1957a88ee229SStephen Hemminger i++; 195819baf839SRobert Olsson } 1959a88ee229SStephen Hemminger 196071d67e66SStephen Hemminger cb->args[4] = i; 196119baf839SRobert Olsson return skb->len; 196219baf839SRobert Olsson } 196319baf839SRobert Olsson 1964a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */ 196516c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1966a07f5f50SStephen Hemminger struct netlink_callback *cb) 196719baf839SRobert Olsson { 196819baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 196988bae714SAlexander Duyck struct key_vector *l, *tp = t->kv; 1970d5ce8a0eSStephen Hemminger /* Dump starting at last key. 1971d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 1972d5ce8a0eSStephen Hemminger */ 19738be33e95SAlexander Duyck int count = cb->args[2]; 19748be33e95SAlexander Duyck t_key key = cb->args[3]; 1975a88ee229SStephen Hemminger 19768be33e95SAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 1977a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 19788be33e95SAlexander Duyck cb->args[3] = key; 19798be33e95SAlexander Duyck cb->args[2] = count; 198019baf839SRobert Olsson return -1; 198119baf839SRobert Olsson } 1982d5ce8a0eSStephen Hemminger 198371d67e66SStephen Hemminger ++count; 19848be33e95SAlexander Duyck key = l->key + 1; 19858be33e95SAlexander Duyck 198671d67e66SStephen Hemminger memset(&cb->args[4], 0, 198771d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 19888be33e95SAlexander Duyck 19898be33e95SAlexander Duyck /* stop loop if key wrapped back to 0 */ 19908be33e95SAlexander Duyck if (key < l->key) 19918be33e95SAlexander Duyck break; 1992a88ee229SStephen Hemminger } 19938be33e95SAlexander Duyck 19948be33e95SAlexander Duyck cb->args[3] = key; 19958be33e95SAlexander Duyck cb->args[2] = count; 19968be33e95SAlexander Duyck 1997a88ee229SStephen Hemminger return skb->len; 1998a88ee229SStephen Hemminger } 199919baf839SRobert Olsson 20005348ba85SDavid S. Miller void __init fib_trie_init(void) 20017f9b8052SStephen Hemminger { 2002a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 2003a07f5f50SStephen Hemminger sizeof(struct fib_alias), 2004bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 2005bc3c8c1eSStephen Hemminger 2006bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 200741b489fdSAlexander Duyck LEAF_SIZE, 2008bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 20097f9b8052SStephen Hemminger } 201019baf839SRobert Olsson 20110ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) 201219baf839SRobert Olsson { 201319baf839SRobert Olsson struct fib_table *tb; 201419baf839SRobert Olsson struct trie *t; 20150ddcf43dSAlexander Duyck size_t sz = sizeof(*tb); 201619baf839SRobert Olsson 20170ddcf43dSAlexander Duyck if (!alias) 20180ddcf43dSAlexander Duyck sz += sizeof(struct trie); 20190ddcf43dSAlexander Duyck 20200ddcf43dSAlexander Duyck tb = kzalloc(sz, GFP_KERNEL); 202151456b29SIan Morris if (!tb) 202219baf839SRobert Olsson return NULL; 202319baf839SRobert Olsson 202419baf839SRobert Olsson tb->tb_id = id; 202521d8c49eSDavid S. Miller tb->tb_num_default = 0; 20260ddcf43dSAlexander Duyck tb->tb_data = (alias ? alias->__data : tb->__data); 20270ddcf43dSAlexander Duyck 20280ddcf43dSAlexander Duyck if (alias) 20290ddcf43dSAlexander Duyck return tb; 203019baf839SRobert Olsson 203119baf839SRobert Olsson t = (struct trie *) tb->tb_data; 203288bae714SAlexander Duyck t->kv[0].pos = KEYLENGTH; 203388bae714SAlexander Duyck t->kv[0].slen = KEYLENGTH; 20348274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 20358274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 20368274a97aSAlexander Duyck if (!t->stats) { 20378274a97aSAlexander Duyck kfree(tb); 20388274a97aSAlexander Duyck tb = NULL; 20398274a97aSAlexander Duyck } 20408274a97aSAlexander Duyck #endif 204119baf839SRobert Olsson 204219baf839SRobert Olsson return tb; 204319baf839SRobert Olsson } 204419baf839SRobert Olsson 2045cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2046cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2047cb7b593cSStephen Hemminger struct fib_trie_iter { 20481c340b2fSDenis V. Lunev struct seq_net_private p; 20493d3b2d25SStephen Hemminger struct fib_table *tb; 205035c6edacSAlexander Duyck struct key_vector *tnode; 2051a034ee3cSEric Dumazet unsigned int index; 2052a034ee3cSEric Dumazet unsigned int depth; 2053cb7b593cSStephen Hemminger }; 205419baf839SRobert Olsson 205535c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) 205619baf839SRobert Olsson { 205798293e8dSAlexander Duyck unsigned long cindex = iter->index; 205888bae714SAlexander Duyck struct key_vector *pn = iter->tnode; 205988bae714SAlexander Duyck t_key pkey; 20606640e697SEric W. Biederman 2061cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2062cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 206319baf839SRobert Olsson 206488bae714SAlexander Duyck while (!IS_TRIE(pn)) { 206588bae714SAlexander Duyck while (cindex < child_length(pn)) { 206688bae714SAlexander Duyck struct key_vector *n = get_child_rcu(pn, cindex++); 206788bae714SAlexander Duyck 206888bae714SAlexander Duyck if (!n) 206988bae714SAlexander Duyck continue; 207088bae714SAlexander Duyck 207119baf839SRobert Olsson if (IS_LEAF(n)) { 207288bae714SAlexander Duyck iter->tnode = pn; 207388bae714SAlexander Duyck iter->index = cindex; 207491b9a277SOlof Johansson } else { 2075cb7b593cSStephen Hemminger /* push down one level */ 2076adaf9816SAlexander Duyck iter->tnode = n; 2077cb7b593cSStephen Hemminger iter->index = 0; 2078cb7b593cSStephen Hemminger ++iter->depth; 207919baf839SRobert Olsson } 208088bae714SAlexander Duyck 2081cb7b593cSStephen Hemminger return n; 208219baf839SRobert Olsson } 208319baf839SRobert Olsson 2084cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 208588bae714SAlexander Duyck pkey = pn->key; 208688bae714SAlexander Duyck pn = node_parent_rcu(pn); 208788bae714SAlexander Duyck cindex = get_index(pkey, pn) + 1; 2088cb7b593cSStephen Hemminger --iter->depth; 2089cb7b593cSStephen Hemminger } 2090cb7b593cSStephen Hemminger 209188bae714SAlexander Duyck /* record root node so further searches know we are done */ 209288bae714SAlexander Duyck iter->tnode = pn; 209388bae714SAlexander Duyck iter->index = 0; 209488bae714SAlexander Duyck 2095cb7b593cSStephen Hemminger return NULL; 2096cb7b593cSStephen Hemminger } 2097cb7b593cSStephen Hemminger 209835c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2099cb7b593cSStephen Hemminger struct trie *t) 2100cb7b593cSStephen Hemminger { 2101f38b24c9SFiro Yang struct key_vector *n, *pn; 21025ddf0eb2SRobert Olsson 21035ddf0eb2SRobert Olsson if (!t) 21045ddf0eb2SRobert Olsson return NULL; 21055ddf0eb2SRobert Olsson 2106f38b24c9SFiro Yang pn = t->kv; 210788bae714SAlexander Duyck n = rcu_dereference(pn->tnode[0]); 21083d3b2d25SStephen Hemminger if (!n) 21095ddf0eb2SRobert Olsson return NULL; 2110cb7b593cSStephen Hemminger 21116640e697SEric W. Biederman if (IS_TNODE(n)) { 2112adaf9816SAlexander Duyck iter->tnode = n; 2113cb7b593cSStephen Hemminger iter->index = 0; 21141d25cd6cSRobert Olsson iter->depth = 1; 21156640e697SEric W. Biederman } else { 211688bae714SAlexander Duyck iter->tnode = pn; 21176640e697SEric W. Biederman iter->index = 0; 21186640e697SEric W. Biederman iter->depth = 0; 21196640e697SEric W. Biederman } 21203d3b2d25SStephen Hemminger 2121cb7b593cSStephen Hemminger return n; 2122cb7b593cSStephen Hemminger } 2123cb7b593cSStephen Hemminger 2124cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 212519baf839SRobert Olsson { 212635c6edacSAlexander Duyck struct key_vector *n; 2127cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2128cb7b593cSStephen Hemminger 2129cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 213019baf839SRobert Olsson 21312373ce1cSRobert Olsson rcu_read_lock(); 21323d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2133cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 213479e5ad2cSAlexander Duyck struct fib_alias *fa; 213593672292SStephen Hemminger 213619baf839SRobert Olsson s->leaves++; 2137cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2138cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2139cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 214093672292SStephen Hemminger 214179e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) 214293672292SStephen Hemminger ++s->prefixes; 214391b9a277SOlof Johansson } else { 214419baf839SRobert Olsson s->tnodes++; 2145adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 2146adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 21476e22d174SAlexander Duyck s->nullpointers += tn_info(n)->empty_children; 214819baf839SRobert Olsson } 214998293e8dSAlexander Duyck } 21502373ce1cSRobert Olsson rcu_read_unlock(); 215119baf839SRobert Olsson } 215219baf839SRobert Olsson 215319baf839SRobert Olsson /* 215419baf839SRobert Olsson * This outputs /proc/net/fib_triestats 215519baf839SRobert Olsson */ 2156cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 215719baf839SRobert Olsson { 2158a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 215919baf839SRobert Olsson 216019baf839SRobert Olsson if (stat->leaves) 216119baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 216219baf839SRobert Olsson else 216319baf839SRobert Olsson avdepth = 0; 216419baf839SRobert Olsson 2165a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2166a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2167cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2168cb7b593cSStephen Hemminger 2169cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 217041b489fdSAlexander Duyck bytes = LEAF_SIZE * stat->leaves; 217193672292SStephen Hemminger 217293672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 217379e5ad2cSAlexander Duyck bytes += sizeof(struct fib_alias) * stat->prefixes; 217493672292SStephen Hemminger 2175187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 217641b489fdSAlexander Duyck bytes += TNODE_SIZE(0) * stat->tnodes; 217719baf839SRobert Olsson 217806ef921dSRobert Olsson max = MAX_STAT_DEPTH; 217906ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 218019baf839SRobert Olsson max--; 218119baf839SRobert Olsson 2182cb7b593cSStephen Hemminger pointers = 0; 2183f585a991SJerry Snitselaar for (i = 1; i < max; i++) 218419baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2185187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 218619baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 218719baf839SRobert Olsson } 2188cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2189187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2190cb7b593cSStephen Hemminger 219135c6edacSAlexander Duyck bytes += sizeof(struct key_vector *) * pointers; 2192187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2193187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 219466a2f7fdSStephen Hemminger } 219519baf839SRobert Olsson 219619baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 219766a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 21988274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 219966a2f7fdSStephen Hemminger { 22008274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 22018274a97aSAlexander Duyck int cpu; 22028274a97aSAlexander Duyck 22038274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 22048274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 22058274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 22068274a97aSAlexander Duyck 22078274a97aSAlexander Duyck s.gets += pcpu->gets; 22088274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 22098274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 22108274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 22118274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 22128274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 22138274a97aSAlexander Duyck } 22148274a97aSAlexander Duyck 221566a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 22168274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 22178274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2218a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 22198274a97aSAlexander Duyck s.semantic_match_passed); 22208274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 22218274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 22228274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 222319baf839SRobert Olsson } 222466a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 222566a2f7fdSStephen Hemminger 22263d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2227d717a9a6SStephen Hemminger { 22283d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 22293d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 22303d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 22313d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 22323d3b2d25SStephen Hemminger else 22333d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2234d717a9a6SStephen Hemminger } 223519baf839SRobert Olsson 22363d3b2d25SStephen Hemminger 223719baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 223819baf839SRobert Olsson { 22391c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 22403d3b2d25SStephen Hemminger unsigned int h; 2241877a9bffSEric W. Biederman 2242d717a9a6SStephen Hemminger seq_printf(seq, 2243a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2244a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 224541b489fdSAlexander Duyck LEAF_SIZE, TNODE_SIZE(0)); 224619baf839SRobert Olsson 22473d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22483d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22493d3b2d25SStephen Hemminger struct fib_table *tb; 2250cb7b593cSStephen Hemminger 2251b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 22523d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 22533d3b2d25SStephen Hemminger struct trie_stat stat; 22543d3b2d25SStephen Hemminger 22553d3b2d25SStephen Hemminger if (!t) 22563d3b2d25SStephen Hemminger continue; 22573d3b2d25SStephen Hemminger 22583d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22593d3b2d25SStephen Hemminger 22603d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22613d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22623d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22638274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 22643d3b2d25SStephen Hemminger #endif 22653d3b2d25SStephen Hemminger } 22663d3b2d25SStephen Hemminger } 2267cb7b593cSStephen Hemminger 226819baf839SRobert Olsson return 0; 226919baf839SRobert Olsson } 227019baf839SRobert Olsson 227119baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 227219baf839SRobert Olsson { 2273de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 22741c340b2fSDenis V. Lunev } 22751c340b2fSDenis V. Lunev 22769a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 227719baf839SRobert Olsson .owner = THIS_MODULE, 227819baf839SRobert Olsson .open = fib_triestat_seq_open, 227919baf839SRobert Olsson .read = seq_read, 228019baf839SRobert Olsson .llseek = seq_lseek, 2281b6fcbdb4SPavel Emelyanov .release = single_release_net, 228219baf839SRobert Olsson }; 228319baf839SRobert Olsson 228435c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 228519baf839SRobert Olsson { 22861218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 22871218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2288cb7b593cSStephen Hemminger loff_t idx = 0; 22893d3b2d25SStephen Hemminger unsigned int h; 22903d3b2d25SStephen Hemminger 22913d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22923d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22933d3b2d25SStephen Hemminger struct fib_table *tb; 22943d3b2d25SStephen Hemminger 2295b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 229635c6edacSAlexander Duyck struct key_vector *n; 2297cb7b593cSStephen Hemminger 22983d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 22993d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 23003d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 23013d3b2d25SStephen Hemminger if (pos == idx++) { 23023d3b2d25SStephen Hemminger iter->tb = tb; 2303cb7b593cSStephen Hemminger return n; 230419baf839SRobert Olsson } 23053d3b2d25SStephen Hemminger } 23063d3b2d25SStephen Hemminger } 230719baf839SRobert Olsson 230819baf839SRobert Olsson return NULL; 230919baf839SRobert Olsson } 231019baf839SRobert Olsson 231119baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2312c95aaf9aSStephen Hemminger __acquires(RCU) 231319baf839SRobert Olsson { 2314cb7b593cSStephen Hemminger rcu_read_lock(); 23151218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 231619baf839SRobert Olsson } 231719baf839SRobert Olsson 231819baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 231919baf839SRobert Olsson { 2320cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 23211218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 23223d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 23233d3b2d25SStephen Hemminger struct hlist_node *tb_node; 23243d3b2d25SStephen Hemminger unsigned int h; 232535c6edacSAlexander Duyck struct key_vector *n; 2326cb7b593cSStephen Hemminger 232719baf839SRobert Olsson ++*pos; 23283d3b2d25SStephen Hemminger /* next node in same table */ 23293d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 23303d3b2d25SStephen Hemminger if (n) 23313d3b2d25SStephen Hemminger return n; 233291b9a277SOlof Johansson 23333d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 23343d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 23350a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 23363d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 23373d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23383d3b2d25SStephen Hemminger if (n) 23393d3b2d25SStephen Hemminger goto found; 23403d3b2d25SStephen Hemminger } 2341cb7b593cSStephen Hemminger 23423d3b2d25SStephen Hemminger /* new hash chain */ 23433d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 23443d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2345b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 23463d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23473d3b2d25SStephen Hemminger if (n) 23483d3b2d25SStephen Hemminger goto found; 23493d3b2d25SStephen Hemminger } 23503d3b2d25SStephen Hemminger } 2351cb7b593cSStephen Hemminger return NULL; 23523d3b2d25SStephen Hemminger 23533d3b2d25SStephen Hemminger found: 23543d3b2d25SStephen Hemminger iter->tb = tb; 23553d3b2d25SStephen Hemminger return n; 235619baf839SRobert Olsson } 235719baf839SRobert Olsson 235819baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2359c95aaf9aSStephen Hemminger __releases(RCU) 236019baf839SRobert Olsson { 2361cb7b593cSStephen Hemminger rcu_read_unlock(); 236219baf839SRobert Olsson } 236319baf839SRobert Olsson 2364cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2365cb7b593cSStephen Hemminger { 2366a034ee3cSEric Dumazet while (n-- > 0) 2367a034ee3cSEric Dumazet seq_puts(seq, " "); 2368cb7b593cSStephen Hemminger } 236919baf839SRobert Olsson 237028d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2371cb7b593cSStephen Hemminger { 2372cb7b593cSStephen Hemminger switch (s) { 2373cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2374cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2375cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2376cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2377cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2378cb7b593cSStephen Hemminger default: 237928d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2380cb7b593cSStephen Hemminger return buf; 2381cb7b593cSStephen Hemminger } 2382cb7b593cSStephen Hemminger } 2383cb7b593cSStephen Hemminger 238436cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2385cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2386cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2387cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2388cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2389cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2390cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2391cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2392cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2393cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2394cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2395cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2396cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2397cb7b593cSStephen Hemminger }; 2398cb7b593cSStephen Hemminger 2399a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2400cb7b593cSStephen Hemminger { 2401cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2402cb7b593cSStephen Hemminger return rtn_type_names[t]; 240328d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2404cb7b593cSStephen Hemminger return buf; 2405cb7b593cSStephen Hemminger } 2406cb7b593cSStephen Hemminger 2407cb7b593cSStephen Hemminger /* Pretty print the trie */ 240819baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 240919baf839SRobert Olsson { 2410cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 241135c6edacSAlexander Duyck struct key_vector *n = v; 241219baf839SRobert Olsson 241388bae714SAlexander Duyck if (IS_TRIE(node_parent_rcu(n))) 24143d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2415095b8501SRobert Olsson 2416095b8501SRobert Olsson if (IS_TNODE(n)) { 2417adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2418095b8501SRobert Olsson 24191d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2420e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2421e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 24226e22d174SAlexander Duyck tn_info(n)->full_children, 24236e22d174SAlexander Duyck tn_info(n)->empty_children); 2424cb7b593cSStephen Hemminger } else { 2425adaf9816SAlexander Duyck __be32 val = htonl(n->key); 242679e5ad2cSAlexander Duyck struct fib_alias *fa; 2427cb7b593cSStephen Hemminger 2428cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2429673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 243028d36e37SEric Dumazet 243179e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 243228d36e37SEric Dumazet char buf1[32], buf2[32]; 243328d36e37SEric Dumazet 2434cb7b593cSStephen Hemminger seq_indent(seq, iter->depth + 1); 24355786ec60SAlexander Duyck seq_printf(seq, " /%zu %s %s", 24369b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 243728d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 243837e826c5SDavid S. Miller fa->fa_info->fib_scope), 243928d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 244028d36e37SEric Dumazet fa->fa_type)); 2441cb7b593cSStephen Hemminger if (fa->fa_tos) 2442b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2443cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2444cb7b593cSStephen Hemminger } 2445cb7b593cSStephen Hemminger } 244619baf839SRobert Olsson 244719baf839SRobert Olsson return 0; 244819baf839SRobert Olsson } 244919baf839SRobert Olsson 2450f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 245119baf839SRobert Olsson .start = fib_trie_seq_start, 245219baf839SRobert Olsson .next = fib_trie_seq_next, 245319baf839SRobert Olsson .stop = fib_trie_seq_stop, 245419baf839SRobert Olsson .show = fib_trie_seq_show, 245519baf839SRobert Olsson }; 245619baf839SRobert Olsson 245719baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 245819baf839SRobert Olsson { 24591c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2460cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 246119baf839SRobert Olsson } 246219baf839SRobert Olsson 24639a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 246419baf839SRobert Olsson .owner = THIS_MODULE, 246519baf839SRobert Olsson .open = fib_trie_seq_open, 246619baf839SRobert Olsson .read = seq_read, 246719baf839SRobert Olsson .llseek = seq_lseek, 24681c340b2fSDenis V. Lunev .release = seq_release_net, 246919baf839SRobert Olsson }; 247019baf839SRobert Olsson 24718315f5d8SStephen Hemminger struct fib_route_iter { 24728315f5d8SStephen Hemminger struct seq_net_private p; 24738be33e95SAlexander Duyck struct fib_table *main_tb; 247435c6edacSAlexander Duyck struct key_vector *tnode; 24758315f5d8SStephen Hemminger loff_t pos; 24768315f5d8SStephen Hemminger t_key key; 24778315f5d8SStephen Hemminger }; 24788315f5d8SStephen Hemminger 247935c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, 248035c6edacSAlexander Duyck loff_t pos) 24818315f5d8SStephen Hemminger { 248235c6edacSAlexander Duyck struct key_vector *l, **tp = &iter->tnode; 24838be33e95SAlexander Duyck t_key key; 24848315f5d8SStephen Hemminger 2485fd0285a3SAlexander Duyck /* use cached location of previously found key */ 24868be33e95SAlexander Duyck if (iter->pos > 0 && pos >= iter->pos) { 24878be33e95SAlexander Duyck key = iter->key; 24888be33e95SAlexander Duyck } else { 2489fd0285a3SAlexander Duyck iter->pos = 1; 24908be33e95SAlexander Duyck key = 0; 24918315f5d8SStephen Hemminger } 24928315f5d8SStephen Hemminger 2493fd0285a3SAlexander Duyck pos -= iter->pos; 2494fd0285a3SAlexander Duyck 2495fd0285a3SAlexander Duyck while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { 24968be33e95SAlexander Duyck key = l->key + 1; 24978315f5d8SStephen Hemminger iter->pos++; 24988be33e95SAlexander Duyck l = NULL; 24998be33e95SAlexander Duyck 25008be33e95SAlexander Duyck /* handle unlikely case of a key wrap */ 25018be33e95SAlexander Duyck if (!key) 25028be33e95SAlexander Duyck break; 25038315f5d8SStephen Hemminger } 25048315f5d8SStephen Hemminger 25058315f5d8SStephen Hemminger if (l) 2506fd0285a3SAlexander Duyck iter->key = l->key; /* remember it */ 25078315f5d8SStephen Hemminger else 25088315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 25098315f5d8SStephen Hemminger 25108315f5d8SStephen Hemminger return l; 25118315f5d8SStephen Hemminger } 25128315f5d8SStephen Hemminger 25138315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 25148315f5d8SStephen Hemminger __acquires(RCU) 25158315f5d8SStephen Hemminger { 25168315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 25178315f5d8SStephen Hemminger struct fib_table *tb; 25188be33e95SAlexander Duyck struct trie *t; 25198315f5d8SStephen Hemminger 25208315f5d8SStephen Hemminger rcu_read_lock(); 25218be33e95SAlexander Duyck 25221218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 25238315f5d8SStephen Hemminger if (!tb) 25248315f5d8SStephen Hemminger return NULL; 25258315f5d8SStephen Hemminger 25268be33e95SAlexander Duyck iter->main_tb = tb; 252794d9f1c5SDavid Forster t = (struct trie *)tb->tb_data; 252894d9f1c5SDavid Forster iter->tnode = t->kv; 25298be33e95SAlexander Duyck 25308be33e95SAlexander Duyck if (*pos != 0) 25318be33e95SAlexander Duyck return fib_route_get_idx(iter, *pos); 25328be33e95SAlexander Duyck 25338be33e95SAlexander Duyck iter->pos = 0; 2534fd0285a3SAlexander Duyck iter->key = KEY_MAX; 25358be33e95SAlexander Duyck 25368315f5d8SStephen Hemminger return SEQ_START_TOKEN; 25378315f5d8SStephen Hemminger } 25388315f5d8SStephen Hemminger 25398315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25408315f5d8SStephen Hemminger { 25418315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 254235c6edacSAlexander Duyck struct key_vector *l = NULL; 2543fd0285a3SAlexander Duyck t_key key = iter->key + 1; 25448315f5d8SStephen Hemminger 25458315f5d8SStephen Hemminger ++*pos; 25468be33e95SAlexander Duyck 25478be33e95SAlexander Duyck /* only allow key of 0 for start of sequence */ 25488be33e95SAlexander Duyck if ((v == SEQ_START_TOKEN) || key) 25498be33e95SAlexander Duyck l = leaf_walk_rcu(&iter->tnode, key); 25508be33e95SAlexander Duyck 25518be33e95SAlexander Duyck if (l) { 2552fd0285a3SAlexander Duyck iter->key = l->key; 25538315f5d8SStephen Hemminger iter->pos++; 25548be33e95SAlexander Duyck } else { 25558be33e95SAlexander Duyck iter->pos = 0; 25568315f5d8SStephen Hemminger } 25578315f5d8SStephen Hemminger 25588315f5d8SStephen Hemminger return l; 25598315f5d8SStephen Hemminger } 25608315f5d8SStephen Hemminger 25618315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 25628315f5d8SStephen Hemminger __releases(RCU) 25638315f5d8SStephen Hemminger { 25648315f5d8SStephen Hemminger rcu_read_unlock(); 25658315f5d8SStephen Hemminger } 25668315f5d8SStephen Hemminger 2567a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2568cb7b593cSStephen Hemminger { 2569a034ee3cSEric Dumazet unsigned int flags = 0; 2570cb7b593cSStephen Hemminger 2571a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2572a034ee3cSEric Dumazet flags = RTF_REJECT; 2573cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2574cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 257532ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2576cb7b593cSStephen Hemminger flags |= RTF_HOST; 2577cb7b593cSStephen Hemminger flags |= RTF_UP; 2578cb7b593cSStephen Hemminger return flags; 2579cb7b593cSStephen Hemminger } 2580cb7b593cSStephen Hemminger 2581cb7b593cSStephen Hemminger /* 2582cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2583cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2584cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2585cb7b593cSStephen Hemminger * legacy utilities 2586cb7b593cSStephen Hemminger */ 2587cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2588cb7b593cSStephen Hemminger { 2589654eff45SAlexander Duyck struct fib_route_iter *iter = seq->private; 2590654eff45SAlexander Duyck struct fib_table *tb = iter->main_tb; 259179e5ad2cSAlexander Duyck struct fib_alias *fa; 259235c6edacSAlexander Duyck struct key_vector *l = v; 25939b6ebad5SAlexander Duyck __be32 prefix; 2594cb7b593cSStephen Hemminger 2595cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2596cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2597cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2598cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2599cb7b593cSStephen Hemminger return 0; 2600cb7b593cSStephen Hemminger } 2601cb7b593cSStephen Hemminger 26029b6ebad5SAlexander Duyck prefix = htonl(l->key); 26039b6ebad5SAlexander Duyck 260479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 26051371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 26069b6ebad5SAlexander Duyck __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); 2607a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2608cb7b593cSStephen Hemminger 260979e5ad2cSAlexander Duyck if ((fa->fa_type == RTN_BROADCAST) || 261079e5ad2cSAlexander Duyck (fa->fa_type == RTN_MULTICAST)) 2611cb7b593cSStephen Hemminger continue; 2612cb7b593cSStephen Hemminger 2613654eff45SAlexander Duyck if (fa->tb_id != tb->tb_id) 2614654eff45SAlexander Duyck continue; 2615654eff45SAlexander Duyck 2616652586dfSTetsuo Handa seq_setwidth(seq, 127); 2617652586dfSTetsuo Handa 2618cb7b593cSStephen Hemminger if (fi) 26195e659e4cSPavel Emelyanov seq_printf(seq, 26205e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2621652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2622cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2623cb7b593cSStephen Hemminger prefix, 2624cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2625cb7b593cSStephen Hemminger fi->fib_priority, 2626cb7b593cSStephen Hemminger mask, 2627a07f5f50SStephen Hemminger (fi->fib_advmss ? 2628a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2629cb7b593cSStephen Hemminger fi->fib_window, 2630652586dfSTetsuo Handa fi->fib_rtt >> 3); 2631cb7b593cSStephen Hemminger else 26325e659e4cSPavel Emelyanov seq_printf(seq, 26335e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2634652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2635cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2636652586dfSTetsuo Handa mask, 0, 0, 0); 2637cb7b593cSStephen Hemminger 2638652586dfSTetsuo Handa seq_pad(seq, '\n'); 2639cb7b593cSStephen Hemminger } 2640cb7b593cSStephen Hemminger 2641cb7b593cSStephen Hemminger return 0; 2642cb7b593cSStephen Hemminger } 2643cb7b593cSStephen Hemminger 2644f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 26458315f5d8SStephen Hemminger .start = fib_route_seq_start, 26468315f5d8SStephen Hemminger .next = fib_route_seq_next, 26478315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2648cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2649cb7b593cSStephen Hemminger }; 2650cb7b593cSStephen Hemminger 2651cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2652cb7b593cSStephen Hemminger { 26531c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 26548315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2655cb7b593cSStephen Hemminger } 2656cb7b593cSStephen Hemminger 26579a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2658cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2659cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2660cb7b593cSStephen Hemminger .read = seq_read, 2661cb7b593cSStephen Hemminger .llseek = seq_lseek, 26621c340b2fSDenis V. Lunev .release = seq_release_net, 2663cb7b593cSStephen Hemminger }; 2664cb7b593cSStephen Hemminger 266561a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 266619baf839SRobert Olsson { 2667d4beaa66SGao feng if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) 2668cb7b593cSStephen Hemminger goto out1; 2669cb7b593cSStephen Hemminger 2670d4beaa66SGao feng if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, 267161a02653SDenis V. Lunev &fib_triestat_fops)) 2672cb7b593cSStephen Hemminger goto out2; 2673cb7b593cSStephen Hemminger 2674d4beaa66SGao feng if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) 2675cb7b593cSStephen Hemminger goto out3; 2676cb7b593cSStephen Hemminger 267719baf839SRobert Olsson return 0; 2678cb7b593cSStephen Hemminger 2679cb7b593cSStephen Hemminger out3: 2680ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2681cb7b593cSStephen Hemminger out2: 2682ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2683cb7b593cSStephen Hemminger out1: 2684cb7b593cSStephen Hemminger return -ENOMEM; 268519baf839SRobert Olsson } 268619baf839SRobert Olsson 268761a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 268819baf839SRobert Olsson { 2689ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2690ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2691ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 269219baf839SRobert Olsson } 269319baf839SRobert Olsson 269419baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2695