119baf839SRobert Olsson /* 219baf839SRobert Olsson * This program is free software; you can redistribute it and/or 319baf839SRobert Olsson * modify it under the terms of the GNU General Public License 419baf839SRobert Olsson * as published by the Free Software Foundation; either version 519baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 619baf839SRobert Olsson * 719baf839SRobert Olsson * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 819baf839SRobert Olsson * & Swedish University of Agricultural Sciences. 919baf839SRobert Olsson * 1019baf839SRobert Olsson * Jens Laas <jens.laas@data.slu.se> Swedish University of 1119baf839SRobert Olsson * Agricultural Sciences. 1219baf839SRobert Olsson * 1319baf839SRobert Olsson * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 1419baf839SRobert Olsson * 1525985edcSLucas De Marchi * This work is based on the LPC-trie which is originally described in: 1619baf839SRobert Olsson * 1719baf839SRobert Olsson * An experimental study of compression methods for dynamic tries 1819baf839SRobert Olsson * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19631dd1a8SJustin P. Mattock * http://www.csc.kth.se/~snilsson/software/dyntrie2/ 2019baf839SRobert Olsson * 2119baf839SRobert Olsson * 2219baf839SRobert Olsson * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 2319baf839SRobert Olsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 2419baf839SRobert Olsson * 2519baf839SRobert Olsson * 2619baf839SRobert Olsson * Code from fib_hash has been reused which includes the following header: 2719baf839SRobert Olsson * 2819baf839SRobert Olsson * 2919baf839SRobert Olsson * INET An implementation of the TCP/IP protocol suite for the LINUX 3019baf839SRobert Olsson * operating system. INET is implemented using the BSD Socket 3119baf839SRobert Olsson * interface as the means of communication with the user level. 3219baf839SRobert Olsson * 3319baf839SRobert Olsson * IPv4 FIB: lookup engine and maintenance routines. 3419baf839SRobert Olsson * 3519baf839SRobert Olsson * 3619baf839SRobert Olsson * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 3719baf839SRobert Olsson * 3819baf839SRobert Olsson * This program is free software; you can redistribute it and/or 3919baf839SRobert Olsson * modify it under the terms of the GNU General Public License 4019baf839SRobert Olsson * as published by the Free Software Foundation; either version 4119baf839SRobert Olsson * 2 of the License, or (at your option) any later version. 42fd966255SRobert Olsson * 43fd966255SRobert Olsson * Substantial contributions to this work comes from: 44fd966255SRobert Olsson * 45fd966255SRobert Olsson * David S. Miller, <davem@davemloft.net> 46fd966255SRobert Olsson * Stephen Hemminger <shemminger@osdl.org> 47fd966255SRobert Olsson * Paul E. McKenney <paulmck@us.ibm.com> 48fd966255SRobert Olsson * Patrick McHardy <kaber@trash.net> 4919baf839SRobert Olsson */ 5019baf839SRobert Olsson 5180b71b80SJens Låås #define VERSION "0.409" 5219baf839SRobert Olsson 5319baf839SRobert Olsson #include <asm/uaccess.h> 541977f032SJiri Slaby #include <linux/bitops.h> 5519baf839SRobert Olsson #include <linux/types.h> 5619baf839SRobert Olsson #include <linux/kernel.h> 5719baf839SRobert Olsson #include <linux/mm.h> 5819baf839SRobert Olsson #include <linux/string.h> 5919baf839SRobert Olsson #include <linux/socket.h> 6019baf839SRobert Olsson #include <linux/sockios.h> 6119baf839SRobert Olsson #include <linux/errno.h> 6219baf839SRobert Olsson #include <linux/in.h> 6319baf839SRobert Olsson #include <linux/inet.h> 64cd8787abSStephen Hemminger #include <linux/inetdevice.h> 6519baf839SRobert Olsson #include <linux/netdevice.h> 6619baf839SRobert Olsson #include <linux/if_arp.h> 6719baf839SRobert Olsson #include <linux/proc_fs.h> 682373ce1cSRobert Olsson #include <linux/rcupdate.h> 6919baf839SRobert Olsson #include <linux/skbuff.h> 7019baf839SRobert Olsson #include <linux/netlink.h> 7119baf839SRobert Olsson #include <linux/init.h> 7219baf839SRobert Olsson #include <linux/list.h> 735a0e3ad6STejun Heo #include <linux/slab.h> 74bc3b2d7fSPaul Gortmaker #include <linux/export.h> 75ffa915d0SDavid S. Miller #include <linux/vmalloc.h> 76b90eb754SJiri Pirko #include <linux/notifier.h> 77457c4cbcSEric W. Biederman #include <net/net_namespace.h> 7819baf839SRobert Olsson #include <net/ip.h> 7919baf839SRobert Olsson #include <net/protocol.h> 8019baf839SRobert Olsson #include <net/route.h> 8119baf839SRobert Olsson #include <net/tcp.h> 8219baf839SRobert Olsson #include <net/sock.h> 8319baf839SRobert Olsson #include <net/ip_fib.h> 848e05fd71SScott Feldman #include <net/switchdev.h> 85f6d3c192SDavid Ahern #include <trace/events/fib.h> 8619baf839SRobert Olsson #include "fib_lookup.h" 8719baf839SRobert Olsson 88b90eb754SJiri Pirko static BLOCKING_NOTIFIER_HEAD(fib_chain); 89b90eb754SJiri Pirko 90b90eb754SJiri Pirko int register_fib_notifier(struct notifier_block *nb) 91b90eb754SJiri Pirko { 92b90eb754SJiri Pirko return blocking_notifier_chain_register(&fib_chain, nb); 93b90eb754SJiri Pirko } 94b90eb754SJiri Pirko EXPORT_SYMBOL(register_fib_notifier); 95b90eb754SJiri Pirko 96b90eb754SJiri Pirko int unregister_fib_notifier(struct notifier_block *nb) 97b90eb754SJiri Pirko { 98b90eb754SJiri Pirko return blocking_notifier_chain_unregister(&fib_chain, nb); 99b90eb754SJiri Pirko } 100b90eb754SJiri Pirko EXPORT_SYMBOL(unregister_fib_notifier); 101b90eb754SJiri Pirko 102b90eb754SJiri Pirko int call_fib_notifiers(struct net *net, enum fib_event_type event_type, 103b90eb754SJiri Pirko struct fib_notifier_info *info) 104b90eb754SJiri Pirko { 105b90eb754SJiri Pirko info->net = net; 106b90eb754SJiri Pirko return blocking_notifier_call_chain(&fib_chain, event_type, info); 107b90eb754SJiri Pirko } 108b90eb754SJiri Pirko 109b90eb754SJiri Pirko static int call_fib_entry_notifiers(struct net *net, 110b90eb754SJiri Pirko enum fib_event_type event_type, u32 dst, 111b90eb754SJiri Pirko int dst_len, struct fib_info *fi, 112b90eb754SJiri Pirko u8 tos, u8 type, u32 tb_id, u32 nlflags) 113b90eb754SJiri Pirko { 114b90eb754SJiri Pirko struct fib_entry_notifier_info info = { 115b90eb754SJiri Pirko .dst = dst, 116b90eb754SJiri Pirko .dst_len = dst_len, 117b90eb754SJiri Pirko .fi = fi, 118b90eb754SJiri Pirko .tos = tos, 119b90eb754SJiri Pirko .type = type, 120b90eb754SJiri Pirko .tb_id = tb_id, 121b90eb754SJiri Pirko .nlflags = nlflags, 122b90eb754SJiri Pirko }; 123b90eb754SJiri Pirko return call_fib_notifiers(net, event_type, &info.info); 124b90eb754SJiri Pirko } 125b90eb754SJiri Pirko 12606ef921dSRobert Olsson #define MAX_STAT_DEPTH 32 12719baf839SRobert Olsson 12819baf839SRobert Olsson #define KEYLENGTH (8*sizeof(t_key)) 12995f60ea3SAlexander Duyck #define KEY_MAX ((t_key)~0) 13019baf839SRobert Olsson 13119baf839SRobert Olsson typedef unsigned int t_key; 13219baf839SRobert Olsson 13388bae714SAlexander Duyck #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) 13464c9b6fbSAlexander Duyck #define IS_TNODE(n) ((n)->bits) 13564c9b6fbSAlexander Duyck #define IS_LEAF(n) (!(n)->bits) 1362373ce1cSRobert Olsson 13735c6edacSAlexander Duyck struct key_vector { 13841b489fdSAlexander Duyck t_key key; 13941b489fdSAlexander Duyck unsigned char pos; /* 2log(KEYLENGTH) bits needed */ 14041b489fdSAlexander Duyck unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 14141b489fdSAlexander Duyck unsigned char slen; 14241b489fdSAlexander Duyck union { 14341b489fdSAlexander Duyck /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ 14479e5ad2cSAlexander Duyck struct hlist_head leaf; 14541b489fdSAlexander Duyck /* This array is valid if (pos | bits) > 0 (TNODE) */ 14635c6edacSAlexander Duyck struct key_vector __rcu *tnode[0]; 14719baf839SRobert Olsson }; 148adaf9816SAlexander Duyck }; 14919baf839SRobert Olsson 150dc35dbedSAlexander Duyck struct tnode { 15156ca2adfSAlexander Duyck struct rcu_head rcu; 1526e22d174SAlexander Duyck t_key empty_children; /* KEYLENGTH bits needed */ 1536e22d174SAlexander Duyck t_key full_children; /* KEYLENGTH bits needed */ 154f23e59fbSAlexander Duyck struct key_vector __rcu *parent; 155dc35dbedSAlexander Duyck struct key_vector kv[1]; 15656ca2adfSAlexander Duyck #define tn_bits kv[0].bits 157dc35dbedSAlexander Duyck }; 158dc35dbedSAlexander Duyck 159dc35dbedSAlexander Duyck #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) 16041b489fdSAlexander Duyck #define LEAF_SIZE TNODE_SIZE(1) 16141b489fdSAlexander Duyck 16219baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 16319baf839SRobert Olsson struct trie_use_stats { 16419baf839SRobert Olsson unsigned int gets; 16519baf839SRobert Olsson unsigned int backtrack; 16619baf839SRobert Olsson unsigned int semantic_match_passed; 16719baf839SRobert Olsson unsigned int semantic_match_miss; 16819baf839SRobert Olsson unsigned int null_node_hit; 1692f36895aSRobert Olsson unsigned int resize_node_skipped; 17019baf839SRobert Olsson }; 17119baf839SRobert Olsson #endif 17219baf839SRobert Olsson 17319baf839SRobert Olsson struct trie_stat { 17419baf839SRobert Olsson unsigned int totdepth; 17519baf839SRobert Olsson unsigned int maxdepth; 17619baf839SRobert Olsson unsigned int tnodes; 17719baf839SRobert Olsson unsigned int leaves; 17819baf839SRobert Olsson unsigned int nullpointers; 17993672292SStephen Hemminger unsigned int prefixes; 18006ef921dSRobert Olsson unsigned int nodesizes[MAX_STAT_DEPTH]; 18119baf839SRobert Olsson }; 18219baf839SRobert Olsson 18319baf839SRobert Olsson struct trie { 18488bae714SAlexander Duyck struct key_vector kv[1]; 18519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 1868274a97aSAlexander Duyck struct trie_use_stats __percpu *stats; 18719baf839SRobert Olsson #endif 18819baf839SRobert Olsson }; 18919baf839SRobert Olsson 19088bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn); 191c3059477SJarek Poplawski static size_t tnode_free_size; 192c3059477SJarek Poplawski 193c3059477SJarek Poplawski /* 194c3059477SJarek Poplawski * synchronize_rcu after call_rcu for that many pages; it should be especially 195c3059477SJarek Poplawski * useful before resizing the root node with PREEMPT_NONE configs; the value was 196c3059477SJarek Poplawski * obtained experimentally, aiming to avoid visible slowdown. 197c3059477SJarek Poplawski */ 198c3059477SJarek Poplawski static const int sync_pages = 128; 19919baf839SRobert Olsson 200e18b890bSChristoph Lameter static struct kmem_cache *fn_alias_kmem __read_mostly; 201bc3c8c1eSStephen Hemminger static struct kmem_cache *trie_leaf_kmem __read_mostly; 20219baf839SRobert Olsson 20356ca2adfSAlexander Duyck static inline struct tnode *tn_info(struct key_vector *kv) 20456ca2adfSAlexander Duyck { 20556ca2adfSAlexander Duyck return container_of(kv, struct tnode, kv[0]); 20656ca2adfSAlexander Duyck } 20756ca2adfSAlexander Duyck 20864c9b6fbSAlexander Duyck /* caller must hold RTNL */ 209f23e59fbSAlexander Duyck #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) 210754baf8dSAlexander Duyck #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) 2110a5c0475SEric Dumazet 21264c9b6fbSAlexander Duyck /* caller must hold RCU read lock or RTNL */ 213f23e59fbSAlexander Duyck #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) 214754baf8dSAlexander Duyck #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) 2150a5c0475SEric Dumazet 21664c9b6fbSAlexander Duyck /* wrapper for rcu_assign_pointer */ 21735c6edacSAlexander Duyck static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) 21806801916SStephen Hemminger { 219adaf9816SAlexander Duyck if (n) 220f23e59fbSAlexander Duyck rcu_assign_pointer(tn_info(n)->parent, tp); 22164c9b6fbSAlexander Duyck } 22264c9b6fbSAlexander Duyck 223f23e59fbSAlexander Duyck #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) 22464c9b6fbSAlexander Duyck 22564c9b6fbSAlexander Duyck /* This provides us with the number of children in this node, in the case of a 22664c9b6fbSAlexander Duyck * leaf this will return 0 meaning none of the children are accessible. 22764c9b6fbSAlexander Duyck */ 2282e1ac88aSAlexander Duyck static inline unsigned long child_length(const struct key_vector *tn) 22964c9b6fbSAlexander Duyck { 23064c9b6fbSAlexander Duyck return (1ul << tn->bits) & ~(1ul); 23106801916SStephen Hemminger } 2322373ce1cSRobert Olsson 23388bae714SAlexander Duyck #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) 23488bae714SAlexander Duyck 2352e1ac88aSAlexander Duyck static inline unsigned long get_index(t_key key, struct key_vector *kv) 2362e1ac88aSAlexander Duyck { 2372e1ac88aSAlexander Duyck unsigned long index = key ^ kv->key; 2382e1ac88aSAlexander Duyck 23988bae714SAlexander Duyck if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) 24088bae714SAlexander Duyck return 0; 24188bae714SAlexander Duyck 2422e1ac88aSAlexander Duyck return index >> kv->pos; 2432e1ac88aSAlexander Duyck } 2442e1ac88aSAlexander Duyck 245e9b44019SAlexander Duyck /* To understand this stuff, an understanding of keys and all their bits is 246e9b44019SAlexander Duyck * necessary. Every node in the trie has a key associated with it, but not 247e9b44019SAlexander Duyck * all of the bits in that key are significant. 248e9b44019SAlexander Duyck * 249e9b44019SAlexander Duyck * Consider a node 'n' and its parent 'tp'. 250e9b44019SAlexander Duyck * 251e9b44019SAlexander Duyck * If n is a leaf, every bit in its key is significant. Its presence is 252e9b44019SAlexander Duyck * necessitated by path compression, since during a tree traversal (when 253e9b44019SAlexander Duyck * searching for a leaf - unless we are doing an insertion) we will completely 254e9b44019SAlexander Duyck * ignore all skipped bits we encounter. Thus we need to verify, at the end of 255e9b44019SAlexander Duyck * a potentially successful search, that we have indeed been walking the 256e9b44019SAlexander Duyck * correct key path. 257e9b44019SAlexander Duyck * 258e9b44019SAlexander Duyck * Note that we can never "miss" the correct key in the tree if present by 259e9b44019SAlexander Duyck * following the wrong path. Path compression ensures that segments of the key 260e9b44019SAlexander Duyck * that are the same for all keys with a given prefix are skipped, but the 261e9b44019SAlexander Duyck * skipped part *is* identical for each node in the subtrie below the skipped 262e9b44019SAlexander Duyck * bit! trie_insert() in this implementation takes care of that. 263e9b44019SAlexander Duyck * 264e9b44019SAlexander Duyck * if n is an internal node - a 'tnode' here, the various parts of its key 265e9b44019SAlexander Duyck * have many different meanings. 266e9b44019SAlexander Duyck * 267e9b44019SAlexander Duyck * Example: 268e9b44019SAlexander Duyck * _________________________________________________________________ 269e9b44019SAlexander Duyck * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 270e9b44019SAlexander Duyck * ----------------------------------------------------------------- 271e9b44019SAlexander Duyck * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 272e9b44019SAlexander Duyck * 273e9b44019SAlexander Duyck * _________________________________________________________________ 274e9b44019SAlexander Duyck * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 275e9b44019SAlexander Duyck * ----------------------------------------------------------------- 276e9b44019SAlexander Duyck * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 277e9b44019SAlexander Duyck * 278e9b44019SAlexander Duyck * tp->pos = 22 279e9b44019SAlexander Duyck * tp->bits = 3 280e9b44019SAlexander Duyck * n->pos = 13 281e9b44019SAlexander Duyck * n->bits = 4 282e9b44019SAlexander Duyck * 283e9b44019SAlexander Duyck * First, let's just ignore the bits that come before the parent tp, that is 284e9b44019SAlexander Duyck * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this 285e9b44019SAlexander Duyck * point we do not use them for anything. 286e9b44019SAlexander Duyck * 287e9b44019SAlexander Duyck * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 288e9b44019SAlexander Duyck * index into the parent's child array. That is, they will be used to find 289e9b44019SAlexander Duyck * 'n' among tp's children. 290e9b44019SAlexander Duyck * 29198a384ecSXunlei Pang * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits 292e9b44019SAlexander Duyck * for the node n. 293e9b44019SAlexander Duyck * 294e9b44019SAlexander Duyck * All the bits we have seen so far are significant to the node n. The rest 295e9b44019SAlexander Duyck * of the bits are really not needed or indeed known in n->key. 296e9b44019SAlexander Duyck * 297e9b44019SAlexander Duyck * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 298e9b44019SAlexander Duyck * n's child array, and will of course be different for each child. 299e9b44019SAlexander Duyck * 30098a384ecSXunlei Pang * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown 301e9b44019SAlexander Duyck * at this point. 30219baf839SRobert Olsson */ 30319baf839SRobert Olsson 304f5026fabSDenis V. Lunev static const int halve_threshold = 25; 305f5026fabSDenis V. Lunev static const int inflate_threshold = 50; 306345aa031SJarek Poplawski static const int halve_threshold_root = 15; 30780b71b80SJens Låås static const int inflate_threshold_root = 30; 3082373ce1cSRobert Olsson 3092373ce1cSRobert Olsson static void __alias_free_mem(struct rcu_head *head) 3102373ce1cSRobert Olsson { 3112373ce1cSRobert Olsson struct fib_alias *fa = container_of(head, struct fib_alias, rcu); 3122373ce1cSRobert Olsson kmem_cache_free(fn_alias_kmem, fa); 3132373ce1cSRobert Olsson } 3142373ce1cSRobert Olsson 3152373ce1cSRobert Olsson static inline void alias_free_mem_rcu(struct fib_alias *fa) 3162373ce1cSRobert Olsson { 3172373ce1cSRobert Olsson call_rcu(&fa->rcu, __alias_free_mem); 3182373ce1cSRobert Olsson } 3192373ce1cSRobert Olsson 32037fd30f2SAlexander Duyck #define TNODE_KMALLOC_MAX \ 32135c6edacSAlexander Duyck ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 3221de3d87bSAlexander Duyck #define TNODE_VMALLOC_MAX \ 32335c6edacSAlexander Duyck ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) 32437fd30f2SAlexander Duyck 32537fd30f2SAlexander Duyck static void __node_free_rcu(struct rcu_head *head) 3262373ce1cSRobert Olsson { 32756ca2adfSAlexander Duyck struct tnode *n = container_of(head, struct tnode, rcu); 32837fd30f2SAlexander Duyck 32956ca2adfSAlexander Duyck if (!n->tn_bits) 33037fd30f2SAlexander Duyck kmem_cache_free(trie_leaf_kmem, n); 33137fd30f2SAlexander Duyck else 3321d5cfdb0STetsuo Handa kvfree(n); 3332373ce1cSRobert Olsson } 3342373ce1cSRobert Olsson 33556ca2adfSAlexander Duyck #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 336387a5487SStephen Hemminger 337dc35dbedSAlexander Duyck static struct tnode *tnode_alloc(int bits) 3382373ce1cSRobert Olsson { 3391de3d87bSAlexander Duyck size_t size; 3401de3d87bSAlexander Duyck 3411de3d87bSAlexander Duyck /* verify bits is within bounds */ 3421de3d87bSAlexander Duyck if (bits > TNODE_VMALLOC_MAX) 3431de3d87bSAlexander Duyck return NULL; 3441de3d87bSAlexander Duyck 3451de3d87bSAlexander Duyck /* determine size and verify it is non-zero and didn't overflow */ 3461de3d87bSAlexander Duyck size = TNODE_SIZE(1ul << bits); 3471de3d87bSAlexander Duyck 3482373ce1cSRobert Olsson if (size <= PAGE_SIZE) 3498d965444SEric Dumazet return kzalloc(size, GFP_KERNEL); 35015be75cdSStephen Hemminger else 3517a1c8e5aSEric Dumazet return vzalloc(size); 35215be75cdSStephen Hemminger } 3532373ce1cSRobert Olsson 35435c6edacSAlexander Duyck static inline void empty_child_inc(struct key_vector *n) 35595f60ea3SAlexander Duyck { 3566e22d174SAlexander Duyck ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; 35795f60ea3SAlexander Duyck } 35895f60ea3SAlexander Duyck 35935c6edacSAlexander Duyck static inline void empty_child_dec(struct key_vector *n) 36095f60ea3SAlexander Duyck { 3616e22d174SAlexander Duyck tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; 36295f60ea3SAlexander Duyck } 36395f60ea3SAlexander Duyck 36435c6edacSAlexander Duyck static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) 36519baf839SRobert Olsson { 366f38b24c9SFiro Yang struct key_vector *l; 367f38b24c9SFiro Yang struct tnode *kv; 368dc35dbedSAlexander Duyck 369f38b24c9SFiro Yang kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 370dc35dbedSAlexander Duyck if (!kv) 371dc35dbedSAlexander Duyck return NULL; 372dc35dbedSAlexander Duyck 373dc35dbedSAlexander Duyck /* initialize key vector */ 374f38b24c9SFiro Yang l = kv->kv; 37564c9b6fbSAlexander Duyck l->key = key; 376e9b44019SAlexander Duyck l->pos = 0; 37764c9b6fbSAlexander Duyck l->bits = 0; 378dc35dbedSAlexander Duyck l->slen = fa->fa_slen; 37964c9b6fbSAlexander Duyck 380d5d6487cSAlexander Duyck /* link leaf to fib alias */ 38179e5ad2cSAlexander Duyck INIT_HLIST_HEAD(&l->leaf); 382d5d6487cSAlexander Duyck hlist_add_head(&fa->fa_list, &l->leaf); 383dc35dbedSAlexander Duyck 38419baf839SRobert Olsson return l; 38519baf839SRobert Olsson } 38619baf839SRobert Olsson 38735c6edacSAlexander Duyck static struct key_vector *tnode_new(t_key key, int pos, int bits) 38819baf839SRobert Olsson { 38964c9b6fbSAlexander Duyck unsigned int shift = pos + bits; 390f38b24c9SFiro Yang struct key_vector *tn; 391f38b24c9SFiro Yang struct tnode *tnode; 39264c9b6fbSAlexander Duyck 39364c9b6fbSAlexander Duyck /* verify bits and pos their msb bits clear and values are valid */ 39464c9b6fbSAlexander Duyck BUG_ON(!bits || (shift > KEYLENGTH)); 39519baf839SRobert Olsson 396f38b24c9SFiro Yang tnode = tnode_alloc(bits); 397dc35dbedSAlexander Duyck if (!tnode) 398dc35dbedSAlexander Duyck return NULL; 399dc35dbedSAlexander Duyck 400f38b24c9SFiro Yang pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), 401f38b24c9SFiro Yang sizeof(struct key_vector *) << bits); 402f38b24c9SFiro Yang 40395f60ea3SAlexander Duyck if (bits == KEYLENGTH) 4046e22d174SAlexander Duyck tnode->full_children = 1; 40595f60ea3SAlexander Duyck else 4066e22d174SAlexander Duyck tnode->empty_children = 1ul << bits; 407c877efb2SStephen Hemminger 408f38b24c9SFiro Yang tn = tnode->kv; 409dc35dbedSAlexander Duyck tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; 410dc35dbedSAlexander Duyck tn->pos = pos; 411dc35dbedSAlexander Duyck tn->bits = bits; 412dc35dbedSAlexander Duyck tn->slen = pos; 413dc35dbedSAlexander Duyck 41419baf839SRobert Olsson return tn; 41519baf839SRobert Olsson } 41619baf839SRobert Olsson 417e9b44019SAlexander Duyck /* Check whether a tnode 'n' is "full", i.e. it is an internal node 41819baf839SRobert Olsson * and no bits are skipped. See discussion in dyntree paper p. 6 41919baf839SRobert Olsson */ 42035c6edacSAlexander Duyck static inline int tnode_full(struct key_vector *tn, struct key_vector *n) 42119baf839SRobert Olsson { 422e9b44019SAlexander Duyck return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); 42319baf839SRobert Olsson } 42419baf839SRobert Olsson 425ff181ed8SAlexander Duyck /* Add a child at position i overwriting the old value. 42619baf839SRobert Olsson * Update the value of full_children and empty_children. 42719baf839SRobert Olsson */ 42835c6edacSAlexander Duyck static void put_child(struct key_vector *tn, unsigned long i, 42935c6edacSAlexander Duyck struct key_vector *n) 43019baf839SRobert Olsson { 431754baf8dSAlexander Duyck struct key_vector *chi = get_child(tn, i); 432ff181ed8SAlexander Duyck int isfull, wasfull; 43319baf839SRobert Olsson 4342e1ac88aSAlexander Duyck BUG_ON(i >= child_length(tn)); 4350c7770c7SStephen Hemminger 43695f60ea3SAlexander Duyck /* update emptyChildren, overflow into fullChildren */ 43700db4124SIan Morris if (!n && chi) 43895f60ea3SAlexander Duyck empty_child_inc(tn); 43900db4124SIan Morris if (n && !chi) 44095f60ea3SAlexander Duyck empty_child_dec(tn); 44119baf839SRobert Olsson 44219baf839SRobert Olsson /* update fullChildren */ 44319baf839SRobert Olsson wasfull = tnode_full(tn, chi); 44419baf839SRobert Olsson isfull = tnode_full(tn, n); 445ff181ed8SAlexander Duyck 44619baf839SRobert Olsson if (wasfull && !isfull) 4476e22d174SAlexander Duyck tn_info(tn)->full_children--; 44819baf839SRobert Olsson else if (!wasfull && isfull) 4496e22d174SAlexander Duyck tn_info(tn)->full_children++; 45091b9a277SOlof Johansson 4515405afd1SAlexander Duyck if (n && (tn->slen < n->slen)) 4525405afd1SAlexander Duyck tn->slen = n->slen; 4535405afd1SAlexander Duyck 45441b489fdSAlexander Duyck rcu_assign_pointer(tn->tnode[i], n); 45519baf839SRobert Olsson } 45619baf839SRobert Olsson 45735c6edacSAlexander Duyck static void update_children(struct key_vector *tn) 45869fa57b1SAlexander Duyck { 45969fa57b1SAlexander Duyck unsigned long i; 46069fa57b1SAlexander Duyck 46169fa57b1SAlexander Duyck /* update all of the child parent pointers */ 4622e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 463754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 46469fa57b1SAlexander Duyck 46569fa57b1SAlexander Duyck if (!inode) 46669fa57b1SAlexander Duyck continue; 46769fa57b1SAlexander Duyck 46869fa57b1SAlexander Duyck /* Either update the children of a tnode that 46969fa57b1SAlexander Duyck * already belongs to us or update the child 47069fa57b1SAlexander Duyck * to point to ourselves. 47169fa57b1SAlexander Duyck */ 47269fa57b1SAlexander Duyck if (node_parent(inode) == tn) 47369fa57b1SAlexander Duyck update_children(inode); 47469fa57b1SAlexander Duyck else 47569fa57b1SAlexander Duyck node_set_parent(inode, tn); 47669fa57b1SAlexander Duyck } 47769fa57b1SAlexander Duyck } 47869fa57b1SAlexander Duyck 47988bae714SAlexander Duyck static inline void put_child_root(struct key_vector *tp, t_key key, 48088bae714SAlexander Duyck struct key_vector *n) 481836a0123SAlexander Duyck { 48288bae714SAlexander Duyck if (IS_TRIE(tp)) 48388bae714SAlexander Duyck rcu_assign_pointer(tp->tnode[0], n); 484836a0123SAlexander Duyck else 48588bae714SAlexander Duyck put_child(tp, get_index(key, tp), n); 486836a0123SAlexander Duyck } 487836a0123SAlexander Duyck 48835c6edacSAlexander Duyck static inline void tnode_free_init(struct key_vector *tn) 4890a5c0475SEric Dumazet { 49056ca2adfSAlexander Duyck tn_info(tn)->rcu.next = NULL; 4910a5c0475SEric Dumazet } 492fc86a93bSAlexander Duyck 49335c6edacSAlexander Duyck static inline void tnode_free_append(struct key_vector *tn, 49435c6edacSAlexander Duyck struct key_vector *n) 495fc86a93bSAlexander Duyck { 49656ca2adfSAlexander Duyck tn_info(n)->rcu.next = tn_info(tn)->rcu.next; 49756ca2adfSAlexander Duyck tn_info(tn)->rcu.next = &tn_info(n)->rcu; 498fc86a93bSAlexander Duyck } 499fc86a93bSAlexander Duyck 50035c6edacSAlexander Duyck static void tnode_free(struct key_vector *tn) 501fc86a93bSAlexander Duyck { 50256ca2adfSAlexander Duyck struct callback_head *head = &tn_info(tn)->rcu; 503fc86a93bSAlexander Duyck 504fc86a93bSAlexander Duyck while (head) { 505fc86a93bSAlexander Duyck head = head->next; 50641b489fdSAlexander Duyck tnode_free_size += TNODE_SIZE(1ul << tn->bits); 50737fd30f2SAlexander Duyck node_free(tn); 508fc86a93bSAlexander Duyck 50956ca2adfSAlexander Duyck tn = container_of(head, struct tnode, rcu)->kv; 510fc86a93bSAlexander Duyck } 511fc86a93bSAlexander Duyck 512fc86a93bSAlexander Duyck if (tnode_free_size >= PAGE_SIZE * sync_pages) { 513fc86a93bSAlexander Duyck tnode_free_size = 0; 514fc86a93bSAlexander Duyck synchronize_rcu(); 515fc86a93bSAlexander Duyck } 5160a5c0475SEric Dumazet } 5170a5c0475SEric Dumazet 51888bae714SAlexander Duyck static struct key_vector *replace(struct trie *t, 51935c6edacSAlexander Duyck struct key_vector *oldtnode, 52035c6edacSAlexander Duyck struct key_vector *tn) 52169fa57b1SAlexander Duyck { 52235c6edacSAlexander Duyck struct key_vector *tp = node_parent(oldtnode); 52369fa57b1SAlexander Duyck unsigned long i; 52469fa57b1SAlexander Duyck 52569fa57b1SAlexander Duyck /* setup the parent pointer out of and back into this node */ 52669fa57b1SAlexander Duyck NODE_INIT_PARENT(tn, tp); 52788bae714SAlexander Duyck put_child_root(tp, tn->key, tn); 52869fa57b1SAlexander Duyck 52969fa57b1SAlexander Duyck /* update all of the child parent pointers */ 53069fa57b1SAlexander Duyck update_children(tn); 53169fa57b1SAlexander Duyck 53269fa57b1SAlexander Duyck /* all pointers should be clean so we are done */ 53369fa57b1SAlexander Duyck tnode_free(oldtnode); 53469fa57b1SAlexander Duyck 53569fa57b1SAlexander Duyck /* resize children now that oldtnode is freed */ 5362e1ac88aSAlexander Duyck for (i = child_length(tn); i;) { 537754baf8dSAlexander Duyck struct key_vector *inode = get_child(tn, --i); 53869fa57b1SAlexander Duyck 53969fa57b1SAlexander Duyck /* resize child node */ 54069fa57b1SAlexander Duyck if (tnode_full(tn, inode)) 54188bae714SAlexander Duyck tn = resize(t, inode); 54269fa57b1SAlexander Duyck } 5438d8e810cSAlexander Duyck 54488bae714SAlexander Duyck return tp; 54569fa57b1SAlexander Duyck } 54669fa57b1SAlexander Duyck 54788bae714SAlexander Duyck static struct key_vector *inflate(struct trie *t, 54835c6edacSAlexander Duyck struct key_vector *oldtnode) 54919baf839SRobert Olsson { 55035c6edacSAlexander Duyck struct key_vector *tn; 55169fa57b1SAlexander Duyck unsigned long i; 552e9b44019SAlexander Duyck t_key m; 55319baf839SRobert Olsson 5540c7770c7SStephen Hemminger pr_debug("In inflate\n"); 55519baf839SRobert Olsson 556e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); 5572f80b3c8SRobert Olsson if (!tn) 5588d8e810cSAlexander Duyck goto notnode; 5592f36895aSRobert Olsson 56069fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 56169fa57b1SAlexander Duyck tnode_free_init(oldtnode); 56269fa57b1SAlexander Duyck 56312c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 56412c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 56512c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 56612c081a5SAlexander Duyck * nodes. 5672f36895aSRobert Olsson */ 5682e1ac88aSAlexander Duyck for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { 569754baf8dSAlexander Duyck struct key_vector *inode = get_child(oldtnode, --i); 57035c6edacSAlexander Duyck struct key_vector *node0, *node1; 57169fa57b1SAlexander Duyck unsigned long j, k; 57219baf839SRobert Olsson 57319baf839SRobert Olsson /* An empty child */ 57451456b29SIan Morris if (!inode) 57519baf839SRobert Olsson continue; 57619baf839SRobert Olsson 57719baf839SRobert Olsson /* A leaf or an internal node with skipped bits */ 578adaf9816SAlexander Duyck if (!tnode_full(oldtnode, inode)) { 579e9b44019SAlexander Duyck put_child(tn, get_index(inode->key, tn), inode); 58019baf839SRobert Olsson continue; 58119baf839SRobert Olsson } 58219baf839SRobert Olsson 58369fa57b1SAlexander Duyck /* drop the node in the old tnode free list */ 58469fa57b1SAlexander Duyck tnode_free_append(oldtnode, inode); 58569fa57b1SAlexander Duyck 58619baf839SRobert Olsson /* An internal node with two children */ 58719baf839SRobert Olsson if (inode->bits == 1) { 588754baf8dSAlexander Duyck put_child(tn, 2 * i + 1, get_child(inode, 1)); 589754baf8dSAlexander Duyck put_child(tn, 2 * i, get_child(inode, 0)); 59091b9a277SOlof Johansson continue; 59119baf839SRobert Olsson } 59219baf839SRobert Olsson 59319baf839SRobert Olsson /* We will replace this node 'inode' with two new 59412c081a5SAlexander Duyck * ones, 'node0' and 'node1', each with half of the 59519baf839SRobert Olsson * original children. The two new nodes will have 59619baf839SRobert Olsson * a position one bit further down the key and this 59719baf839SRobert Olsson * means that the "significant" part of their keys 59819baf839SRobert Olsson * (see the discussion near the top of this file) 59919baf839SRobert Olsson * will differ by one bit, which will be "0" in 60012c081a5SAlexander Duyck * node0's key and "1" in node1's key. Since we are 60119baf839SRobert Olsson * moving the key position by one step, the bit that 60219baf839SRobert Olsson * we are moving away from - the bit at position 60312c081a5SAlexander Duyck * (tn->pos) - is the one that will differ between 60412c081a5SAlexander Duyck * node0 and node1. So... we synthesize that bit in the 60519baf839SRobert Olsson * two new keys. 60619baf839SRobert Olsson */ 60712c081a5SAlexander Duyck node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); 60812c081a5SAlexander Duyck if (!node1) 60912c081a5SAlexander Duyck goto nomem; 61069fa57b1SAlexander Duyck node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); 61119baf839SRobert Olsson 61269fa57b1SAlexander Duyck tnode_free_append(tn, node1); 61312c081a5SAlexander Duyck if (!node0) 61412c081a5SAlexander Duyck goto nomem; 61512c081a5SAlexander Duyck tnode_free_append(tn, node0); 6162f36895aSRobert Olsson 61712c081a5SAlexander Duyck /* populate child pointers in new nodes */ 6182e1ac88aSAlexander Duyck for (k = child_length(inode), j = k / 2; j;) { 619754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 620754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 621754baf8dSAlexander Duyck put_child(node1, --j, get_child(inode, --k)); 622754baf8dSAlexander Duyck put_child(node0, j, get_child(inode, j)); 62319baf839SRobert Olsson } 624ff181ed8SAlexander Duyck 62512c081a5SAlexander Duyck /* link new nodes to parent */ 62612c081a5SAlexander Duyck NODE_INIT_PARENT(node1, tn); 62712c081a5SAlexander Duyck NODE_INIT_PARENT(node0, tn); 62812c081a5SAlexander Duyck 62912c081a5SAlexander Duyck /* link parent to nodes */ 63012c081a5SAlexander Duyck put_child(tn, 2 * i + 1, node1); 63112c081a5SAlexander Duyck put_child(tn, 2 * i, node0); 63212c081a5SAlexander Duyck } 63312c081a5SAlexander Duyck 63469fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6358d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 636ff181ed8SAlexander Duyck nomem: 637fc86a93bSAlexander Duyck /* all pointers should be clean so we are done */ 638fc86a93bSAlexander Duyck tnode_free(tn); 6398d8e810cSAlexander Duyck notnode: 6408d8e810cSAlexander Duyck return NULL; 641ff181ed8SAlexander Duyck } 642ff181ed8SAlexander Duyck 64388bae714SAlexander Duyck static struct key_vector *halve(struct trie *t, 64435c6edacSAlexander Duyck struct key_vector *oldtnode) 64519baf839SRobert Olsson { 64635c6edacSAlexander Duyck struct key_vector *tn; 64712c081a5SAlexander Duyck unsigned long i; 64819baf839SRobert Olsson 6490c7770c7SStephen Hemminger pr_debug("In halve\n"); 65019baf839SRobert Olsson 651e9b44019SAlexander Duyck tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); 6522f80b3c8SRobert Olsson if (!tn) 6538d8e810cSAlexander Duyck goto notnode; 6542f36895aSRobert Olsson 65569fa57b1SAlexander Duyck /* prepare oldtnode to be freed */ 65669fa57b1SAlexander Duyck tnode_free_init(oldtnode); 65769fa57b1SAlexander Duyck 65812c081a5SAlexander Duyck /* Assemble all of the pointers in our cluster, in this case that 65912c081a5SAlexander Duyck * represents all of the pointers out of our allocated nodes that 66012c081a5SAlexander Duyck * point to existing tnodes and the links between our allocated 66112c081a5SAlexander Duyck * nodes. 6622f36895aSRobert Olsson */ 6632e1ac88aSAlexander Duyck for (i = child_length(oldtnode); i;) { 664754baf8dSAlexander Duyck struct key_vector *node1 = get_child(oldtnode, --i); 665754baf8dSAlexander Duyck struct key_vector *node0 = get_child(oldtnode, --i); 66635c6edacSAlexander Duyck struct key_vector *inode; 6672f36895aSRobert Olsson 66812c081a5SAlexander Duyck /* At least one of the children is empty */ 66912c081a5SAlexander Duyck if (!node1 || !node0) { 67012c081a5SAlexander Duyck put_child(tn, i / 2, node1 ? : node0); 67112c081a5SAlexander Duyck continue; 67212c081a5SAlexander Duyck } 6732f36895aSRobert Olsson 6742f36895aSRobert Olsson /* Two nonempty children */ 67512c081a5SAlexander Duyck inode = tnode_new(node0->key, oldtnode->pos, 1); 6768d8e810cSAlexander Duyck if (!inode) 6778d8e810cSAlexander Duyck goto nomem; 67812c081a5SAlexander Duyck tnode_free_append(tn, inode); 6792f80b3c8SRobert Olsson 68012c081a5SAlexander Duyck /* initialize pointers out of node */ 68112c081a5SAlexander Duyck put_child(inode, 1, node1); 68212c081a5SAlexander Duyck put_child(inode, 0, node0); 68312c081a5SAlexander Duyck NODE_INIT_PARENT(inode, tn); 68412c081a5SAlexander Duyck 68512c081a5SAlexander Duyck /* link parent to node */ 68612c081a5SAlexander Duyck put_child(tn, i / 2, inode); 6872f36895aSRobert Olsson } 6882f36895aSRobert Olsson 68969fa57b1SAlexander Duyck /* setup the parent pointers into and out of this node */ 6908d8e810cSAlexander Duyck return replace(t, oldtnode, tn); 6918d8e810cSAlexander Duyck nomem: 6928d8e810cSAlexander Duyck /* all pointers should be clean so we are done */ 6938d8e810cSAlexander Duyck tnode_free(tn); 6948d8e810cSAlexander Duyck notnode: 6958d8e810cSAlexander Duyck return NULL; 6962f80b3c8SRobert Olsson } 69719baf839SRobert Olsson 69888bae714SAlexander Duyck static struct key_vector *collapse(struct trie *t, 69988bae714SAlexander Duyck struct key_vector *oldtnode) 70095f60ea3SAlexander Duyck { 70135c6edacSAlexander Duyck struct key_vector *n, *tp; 70295f60ea3SAlexander Duyck unsigned long i; 70395f60ea3SAlexander Duyck 70495f60ea3SAlexander Duyck /* scan the tnode looking for that one child that might still exist */ 7052e1ac88aSAlexander Duyck for (n = NULL, i = child_length(oldtnode); !n && i;) 706754baf8dSAlexander Duyck n = get_child(oldtnode, --i); 70795f60ea3SAlexander Duyck 70895f60ea3SAlexander Duyck /* compress one level */ 70995f60ea3SAlexander Duyck tp = node_parent(oldtnode); 71088bae714SAlexander Duyck put_child_root(tp, oldtnode->key, n); 71195f60ea3SAlexander Duyck node_set_parent(n, tp); 71295f60ea3SAlexander Duyck 71395f60ea3SAlexander Duyck /* drop dead node */ 71495f60ea3SAlexander Duyck node_free(oldtnode); 71588bae714SAlexander Duyck 71688bae714SAlexander Duyck return tp; 71795f60ea3SAlexander Duyck } 71895f60ea3SAlexander Duyck 71935c6edacSAlexander Duyck static unsigned char update_suffix(struct key_vector *tn) 7205405afd1SAlexander Duyck { 7215405afd1SAlexander Duyck unsigned char slen = tn->pos; 7225405afd1SAlexander Duyck unsigned long stride, i; 7235405afd1SAlexander Duyck 7245405afd1SAlexander Duyck /* search though the list of children looking for nodes that might 7255405afd1SAlexander Duyck * have a suffix greater than the one we currently have. This is 7265405afd1SAlexander Duyck * why we start with a stride of 2 since a stride of 1 would 7275405afd1SAlexander Duyck * represent the nodes with suffix length equal to tn->pos 7285405afd1SAlexander Duyck */ 7292e1ac88aSAlexander Duyck for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { 730754baf8dSAlexander Duyck struct key_vector *n = get_child(tn, i); 7315405afd1SAlexander Duyck 7325405afd1SAlexander Duyck if (!n || (n->slen <= slen)) 7335405afd1SAlexander Duyck continue; 7345405afd1SAlexander Duyck 7355405afd1SAlexander Duyck /* update stride and slen based on new value */ 7365405afd1SAlexander Duyck stride <<= (n->slen - slen); 7375405afd1SAlexander Duyck slen = n->slen; 7385405afd1SAlexander Duyck i &= ~(stride - 1); 7395405afd1SAlexander Duyck 7405405afd1SAlexander Duyck /* if slen covers all but the last bit we can stop here 7415405afd1SAlexander Duyck * there will be nothing longer than that since only node 7425405afd1SAlexander Duyck * 0 and 1 << (bits - 1) could have that as their suffix 7435405afd1SAlexander Duyck * length. 7445405afd1SAlexander Duyck */ 7455405afd1SAlexander Duyck if ((slen + 1) >= (tn->pos + tn->bits)) 7465405afd1SAlexander Duyck break; 7475405afd1SAlexander Duyck } 7485405afd1SAlexander Duyck 7495405afd1SAlexander Duyck tn->slen = slen; 7505405afd1SAlexander Duyck 7515405afd1SAlexander Duyck return slen; 7525405afd1SAlexander Duyck } 7535405afd1SAlexander Duyck 754f05a4819SAlexander Duyck /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of 755cf3637bbSAlexander Duyck * the Helsinki University of Technology and Matti Tikkanen of Nokia 756cf3637bbSAlexander Duyck * Telecommunications, page 6: 757cf3637bbSAlexander Duyck * "A node is doubled if the ratio of non-empty children to all 758cf3637bbSAlexander Duyck * children in the *doubled* node is at least 'high'." 759cf3637bbSAlexander Duyck * 760cf3637bbSAlexander Duyck * 'high' in this instance is the variable 'inflate_threshold'. It 761cf3637bbSAlexander Duyck * is expressed as a percentage, so we multiply it with 7622e1ac88aSAlexander Duyck * child_length() and instead of multiplying by 2 (since the 763cf3637bbSAlexander Duyck * child array will be doubled by inflate()) and multiplying 764cf3637bbSAlexander Duyck * the left-hand side by 100 (to handle the percentage thing) we 765cf3637bbSAlexander Duyck * multiply the left-hand side by 50. 766cf3637bbSAlexander Duyck * 7672e1ac88aSAlexander Duyck * The left-hand side may look a bit weird: child_length(tn) 768cf3637bbSAlexander Duyck * - tn->empty_children is of course the number of non-null children 769cf3637bbSAlexander Duyck * in the current node. tn->full_children is the number of "full" 770cf3637bbSAlexander Duyck * children, that is non-null tnodes with a skip value of 0. 771cf3637bbSAlexander Duyck * All of those will be doubled in the resulting inflated tnode, so 772cf3637bbSAlexander Duyck * we just count them one extra time here. 773cf3637bbSAlexander Duyck * 774cf3637bbSAlexander Duyck * A clearer way to write this would be: 775cf3637bbSAlexander Duyck * 776cf3637bbSAlexander Duyck * to_be_doubled = tn->full_children; 7772e1ac88aSAlexander Duyck * not_to_be_doubled = child_length(tn) - tn->empty_children - 778cf3637bbSAlexander Duyck * tn->full_children; 779cf3637bbSAlexander Duyck * 7802e1ac88aSAlexander Duyck * new_child_length = child_length(tn) * 2; 781cf3637bbSAlexander Duyck * 782cf3637bbSAlexander Duyck * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 783cf3637bbSAlexander Duyck * new_child_length; 784cf3637bbSAlexander Duyck * if (new_fill_factor >= inflate_threshold) 785cf3637bbSAlexander Duyck * 786cf3637bbSAlexander Duyck * ...and so on, tho it would mess up the while () loop. 787cf3637bbSAlexander Duyck * 788cf3637bbSAlexander Duyck * anyway, 789cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 790cf3637bbSAlexander Duyck * inflate_threshold 791cf3637bbSAlexander Duyck * 792cf3637bbSAlexander Duyck * avoid a division: 793cf3637bbSAlexander Duyck * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 794cf3637bbSAlexander Duyck * inflate_threshold * new_child_length 795cf3637bbSAlexander Duyck * 796cf3637bbSAlexander Duyck * expand not_to_be_doubled and to_be_doubled, and shorten: 7972e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 798cf3637bbSAlexander Duyck * tn->full_children) >= inflate_threshold * new_child_length 799cf3637bbSAlexander Duyck * 800cf3637bbSAlexander Duyck * expand new_child_length: 8012e1ac88aSAlexander Duyck * 100 * (child_length(tn) - tn->empty_children + 802cf3637bbSAlexander Duyck * tn->full_children) >= 8032e1ac88aSAlexander Duyck * inflate_threshold * child_length(tn) * 2 804cf3637bbSAlexander Duyck * 805cf3637bbSAlexander Duyck * shorten again: 8062e1ac88aSAlexander Duyck * 50 * (tn->full_children + child_length(tn) - 807cf3637bbSAlexander Duyck * tn->empty_children) >= inflate_threshold * 8082e1ac88aSAlexander Duyck * child_length(tn) 809cf3637bbSAlexander Duyck * 810cf3637bbSAlexander Duyck */ 81135c6edacSAlexander Duyck static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) 812f05a4819SAlexander Duyck { 8132e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 814f05a4819SAlexander Duyck unsigned long threshold = used; 815cf3637bbSAlexander Duyck 816cf3637bbSAlexander Duyck /* Keep root node larger */ 81788bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; 8186e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 8196e22d174SAlexander Duyck used += tn_info(tn)->full_children; 820cf3637bbSAlexander Duyck 82195f60ea3SAlexander Duyck /* if bits == KEYLENGTH then pos = 0, and will fail below */ 82295f60ea3SAlexander Duyck 82395f60ea3SAlexander Duyck return (used > 1) && tn->pos && ((50 * used) >= threshold); 824cf3637bbSAlexander Duyck } 825cf3637bbSAlexander Duyck 82635c6edacSAlexander Duyck static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) 827f05a4819SAlexander Duyck { 8282e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 829f05a4819SAlexander Duyck unsigned long threshold = used; 830cf3637bbSAlexander Duyck 831f05a4819SAlexander Duyck /* Keep root node larger */ 83288bae714SAlexander Duyck threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; 8336e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 834f05a4819SAlexander Duyck 83595f60ea3SAlexander Duyck /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ 83695f60ea3SAlexander Duyck 83795f60ea3SAlexander Duyck return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); 83895f60ea3SAlexander Duyck } 83995f60ea3SAlexander Duyck 84035c6edacSAlexander Duyck static inline bool should_collapse(struct key_vector *tn) 84195f60ea3SAlexander Duyck { 8422e1ac88aSAlexander Duyck unsigned long used = child_length(tn); 84395f60ea3SAlexander Duyck 8446e22d174SAlexander Duyck used -= tn_info(tn)->empty_children; 84595f60ea3SAlexander Duyck 84695f60ea3SAlexander Duyck /* account for bits == KEYLENGTH case */ 8476e22d174SAlexander Duyck if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) 84895f60ea3SAlexander Duyck used -= KEY_MAX; 84995f60ea3SAlexander Duyck 85095f60ea3SAlexander Duyck /* One child or none, time to drop us from the trie */ 85195f60ea3SAlexander Duyck return used < 2; 852f05a4819SAlexander Duyck } 853f05a4819SAlexander Duyck 854f05a4819SAlexander Duyck #define MAX_WORK 10 85588bae714SAlexander Duyck static struct key_vector *resize(struct trie *t, struct key_vector *tn) 856f05a4819SAlexander Duyck { 8578d8e810cSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8588d8e810cSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 8598d8e810cSAlexander Duyck #endif 86035c6edacSAlexander Duyck struct key_vector *tp = node_parent(tn); 86188bae714SAlexander Duyck unsigned long cindex = get_index(tn->key, tp); 862a80e89d4SAlexander Duyck int max_work = MAX_WORK; 863f05a4819SAlexander Duyck 864f05a4819SAlexander Duyck pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 865f05a4819SAlexander Duyck tn, inflate_threshold, halve_threshold); 866f05a4819SAlexander Duyck 867ff181ed8SAlexander Duyck /* track the tnode via the pointer from the parent instead of 868ff181ed8SAlexander Duyck * doing it ourselves. This way we can let RCU fully do its 869ff181ed8SAlexander Duyck * thing without us interfering 870ff181ed8SAlexander Duyck */ 87188bae714SAlexander Duyck BUG_ON(tn != get_child(tp, cindex)); 872ff181ed8SAlexander Duyck 873f05a4819SAlexander Duyck /* Double as long as the resulting node has a number of 874f05a4819SAlexander Duyck * nonempty nodes that are above the threshold. 875f05a4819SAlexander Duyck */ 876b6f15f82SAlexander Duyck while (should_inflate(tp, tn) && max_work) { 87788bae714SAlexander Duyck tp = inflate(t, tn); 87888bae714SAlexander Duyck if (!tp) { 879cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 8808d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 881cf3637bbSAlexander Duyck #endif 882cf3637bbSAlexander Duyck break; 883cf3637bbSAlexander Duyck } 884ff181ed8SAlexander Duyck 885b6f15f82SAlexander Duyck max_work--; 88688bae714SAlexander Duyck tn = get_child(tp, cindex); 887cf3637bbSAlexander Duyck } 888cf3637bbSAlexander Duyck 889b6f15f82SAlexander Duyck /* update parent in case inflate failed */ 890b6f15f82SAlexander Duyck tp = node_parent(tn); 891b6f15f82SAlexander Duyck 892cf3637bbSAlexander Duyck /* Return if at least one inflate is run */ 893cf3637bbSAlexander Duyck if (max_work != MAX_WORK) 894b6f15f82SAlexander Duyck return tp; 895cf3637bbSAlexander Duyck 896f05a4819SAlexander Duyck /* Halve as long as the number of empty children in this 897cf3637bbSAlexander Duyck * node is above threshold. 898cf3637bbSAlexander Duyck */ 899b6f15f82SAlexander Duyck while (should_halve(tp, tn) && max_work) { 90088bae714SAlexander Duyck tp = halve(t, tn); 90188bae714SAlexander Duyck if (!tp) { 902cf3637bbSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 9038d8e810cSAlexander Duyck this_cpu_inc(stats->resize_node_skipped); 904cf3637bbSAlexander Duyck #endif 905cf3637bbSAlexander Duyck break; 906cf3637bbSAlexander Duyck } 907cf3637bbSAlexander Duyck 908b6f15f82SAlexander Duyck max_work--; 90988bae714SAlexander Duyck tn = get_child(tp, cindex); 910ff181ed8SAlexander Duyck } 911cf3637bbSAlexander Duyck 912cf3637bbSAlexander Duyck /* Only one child remains */ 91388bae714SAlexander Duyck if (should_collapse(tn)) 91488bae714SAlexander Duyck return collapse(t, tn); 91588bae714SAlexander Duyck 916b6f15f82SAlexander Duyck /* update parent in case halve failed */ 91788bae714SAlexander Duyck tp = node_parent(tn); 9185405afd1SAlexander Duyck 9195405afd1SAlexander Duyck /* Return if at least one deflate was run */ 9205405afd1SAlexander Duyck if (max_work != MAX_WORK) 92188bae714SAlexander Duyck return tp; 9225405afd1SAlexander Duyck 9235405afd1SAlexander Duyck /* push the suffix length to the parent node */ 9245405afd1SAlexander Duyck if (tn->slen > tn->pos) { 9255405afd1SAlexander Duyck unsigned char slen = update_suffix(tn); 9265405afd1SAlexander Duyck 92788bae714SAlexander Duyck if (slen > tp->slen) 9285405afd1SAlexander Duyck tp->slen = slen; 929cf3637bbSAlexander Duyck } 9308d8e810cSAlexander Duyck 93188bae714SAlexander Duyck return tp; 932cf3637bbSAlexander Duyck } 933cf3637bbSAlexander Duyck 93435c6edacSAlexander Duyck static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) 93519baf839SRobert Olsson { 93688bae714SAlexander Duyck while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { 9375405afd1SAlexander Duyck if (update_suffix(tp) > l->slen) 9385405afd1SAlexander Duyck break; 9395405afd1SAlexander Duyck tp = node_parent(tp); 9405405afd1SAlexander Duyck } 9415405afd1SAlexander Duyck } 9425405afd1SAlexander Duyck 94335c6edacSAlexander Duyck static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) 9445405afd1SAlexander Duyck { 9455405afd1SAlexander Duyck /* if this is a new leaf then tn will be NULL and we can sort 9465405afd1SAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 9475405afd1SAlexander Duyck */ 94888bae714SAlexander Duyck while (tn->slen < l->slen) { 9495405afd1SAlexander Duyck tn->slen = l->slen; 9505405afd1SAlexander Duyck tn = node_parent(tn); 9515405afd1SAlexander Duyck } 9525405afd1SAlexander Duyck } 9535405afd1SAlexander Duyck 9542373ce1cSRobert Olsson /* rcu_read_lock needs to be hold by caller from readside */ 95535c6edacSAlexander Duyck static struct key_vector *fib_find_node(struct trie *t, 95635c6edacSAlexander Duyck struct key_vector **tp, u32 key) 95719baf839SRobert Olsson { 95888bae714SAlexander Duyck struct key_vector *pn, *n = t->kv; 95988bae714SAlexander Duyck unsigned long index = 0; 96019baf839SRobert Olsson 96188bae714SAlexander Duyck do { 96288bae714SAlexander Duyck pn = n; 96388bae714SAlexander Duyck n = get_child_rcu(n, index); 96488bae714SAlexander Duyck 96588bae714SAlexander Duyck if (!n) 96688bae714SAlexander Duyck break; 96788bae714SAlexander Duyck 96888bae714SAlexander Duyck index = get_cindex(key, n); 96919baf839SRobert Olsson 970939afb06SAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 971939afb06SAlexander Duyck * checks into a single check. The prefix consists of the 972939afb06SAlexander Duyck * prefix plus zeros for the bits in the cindex. The index 973939afb06SAlexander Duyck * is the difference between the key and this value. From 974939afb06SAlexander Duyck * this we can actually derive several pieces of data. 975d4a975e8SAlexander Duyck * if (index >= (1ul << bits)) 976939afb06SAlexander Duyck * we have a mismatch in skip bits and failed 977b3832117SAlexander Duyck * else 978b3832117SAlexander Duyck * we know the value is cindex 979d4a975e8SAlexander Duyck * 980d4a975e8SAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 981d4a975e8SAlexander Duyck * fact that we can only allocate a node with 32 bits if a 982d4a975e8SAlexander Duyck * long is greater than 32 bits. 983939afb06SAlexander Duyck */ 984d4a975e8SAlexander Duyck if (index >= (1ul << n->bits)) { 985d4a975e8SAlexander Duyck n = NULL; 986d4a975e8SAlexander Duyck break; 987d4a975e8SAlexander Duyck } 988939afb06SAlexander Duyck 98988bae714SAlexander Duyck /* keep searching until we find a perfect match leaf or NULL */ 99088bae714SAlexander Duyck } while (IS_TNODE(n)); 991939afb06SAlexander Duyck 99235c6edacSAlexander Duyck *tp = pn; 993d4a975e8SAlexander Duyck 994939afb06SAlexander Duyck return n; 99519baf839SRobert Olsson } 99619baf839SRobert Olsson 99702525368SAlexander Duyck /* Return the first fib alias matching TOS with 99802525368SAlexander Duyck * priority less than or equal to PRIO. 99902525368SAlexander Duyck */ 100079e5ad2cSAlexander Duyck static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, 10010b65bd97SAlexander Duyck u8 tos, u32 prio, u32 tb_id) 100202525368SAlexander Duyck { 100302525368SAlexander Duyck struct fib_alias *fa; 100402525368SAlexander Duyck 100502525368SAlexander Duyck if (!fah) 100602525368SAlexander Duyck return NULL; 100702525368SAlexander Duyck 100856315f9eSAlexander Duyck hlist_for_each_entry(fa, fah, fa_list) { 100979e5ad2cSAlexander Duyck if (fa->fa_slen < slen) 101079e5ad2cSAlexander Duyck continue; 101179e5ad2cSAlexander Duyck if (fa->fa_slen != slen) 101279e5ad2cSAlexander Duyck break; 10130b65bd97SAlexander Duyck if (fa->tb_id > tb_id) 10140b65bd97SAlexander Duyck continue; 10150b65bd97SAlexander Duyck if (fa->tb_id != tb_id) 10160b65bd97SAlexander Duyck break; 101702525368SAlexander Duyck if (fa->fa_tos > tos) 101802525368SAlexander Duyck continue; 101902525368SAlexander Duyck if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 102002525368SAlexander Duyck return fa; 102102525368SAlexander Duyck } 102202525368SAlexander Duyck 102302525368SAlexander Duyck return NULL; 102402525368SAlexander Duyck } 102502525368SAlexander Duyck 102635c6edacSAlexander Duyck static void trie_rebalance(struct trie *t, struct key_vector *tn) 102719baf839SRobert Olsson { 102888bae714SAlexander Duyck while (!IS_TRIE(tn)) 102988bae714SAlexander Duyck tn = resize(t, tn); 103019baf839SRobert Olsson } 103119baf839SRobert Olsson 103235c6edacSAlexander Duyck static int fib_insert_node(struct trie *t, struct key_vector *tp, 1033d5d6487cSAlexander Duyck struct fib_alias *new, t_key key) 103419baf839SRobert Olsson { 103535c6edacSAlexander Duyck struct key_vector *n, *l; 1036836a0123SAlexander Duyck 1037d5d6487cSAlexander Duyck l = leaf_new(key, new); 103879e5ad2cSAlexander Duyck if (!l) 10398d8e810cSAlexander Duyck goto noleaf; 1040d5d6487cSAlexander Duyck 1041d5d6487cSAlexander Duyck /* retrieve child from parent node */ 1042754baf8dSAlexander Duyck n = get_child(tp, get_index(key, tp)); 104319baf839SRobert Olsson 1044836a0123SAlexander Duyck /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1045836a0123SAlexander Duyck * 104619baf839SRobert Olsson * Add a new tnode here 104719baf839SRobert Olsson * first tnode need some special handling 1048836a0123SAlexander Duyck * leaves us in position for handling as case 3 104919baf839SRobert Olsson */ 105019baf839SRobert Olsson if (n) { 105135c6edacSAlexander Duyck struct key_vector *tn; 1052f835e471SRobert Olsson 1053e9b44019SAlexander Duyck tn = tnode_new(key, __fls(key ^ n->key), 1); 10548d8e810cSAlexander Duyck if (!tn) 10558d8e810cSAlexander Duyck goto notnode; 105619baf839SRobert Olsson 1057836a0123SAlexander Duyck /* initialize routes out of node */ 1058836a0123SAlexander Duyck NODE_INIT_PARENT(tn, tp); 1059836a0123SAlexander Duyck put_child(tn, get_index(key, tn) ^ 1, n); 106019baf839SRobert Olsson 1061836a0123SAlexander Duyck /* start adding routes into the node */ 106288bae714SAlexander Duyck put_child_root(tp, key, tn); 1063836a0123SAlexander Duyck node_set_parent(n, tn); 106419baf839SRobert Olsson 1065836a0123SAlexander Duyck /* parent now has a NULL spot where the leaf can go */ 1066e962f302SAlexander Duyck tp = tn; 106719baf839SRobert Olsson } 106891b9a277SOlof Johansson 1069836a0123SAlexander Duyck /* Case 3: n is NULL, and will just insert a new leaf */ 1070836a0123SAlexander Duyck NODE_INIT_PARENT(l, tp); 107188bae714SAlexander Duyck put_child_root(tp, key, l); 10727b85576dSJarek Poplawski trie_rebalance(t, tp); 1073d5d6487cSAlexander Duyck 1074d5d6487cSAlexander Duyck return 0; 10758d8e810cSAlexander Duyck notnode: 10768d8e810cSAlexander Duyck node_free(l); 10778d8e810cSAlexander Duyck noleaf: 10788d8e810cSAlexander Duyck return -ENOMEM; 1079d5d6487cSAlexander Duyck } 1080d5d6487cSAlexander Duyck 108135c6edacSAlexander Duyck static int fib_insert_alias(struct trie *t, struct key_vector *tp, 108235c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *new, 1083d5d6487cSAlexander Duyck struct fib_alias *fa, t_key key) 1084d5d6487cSAlexander Duyck { 1085d5d6487cSAlexander Duyck if (!l) 1086d5d6487cSAlexander Duyck return fib_insert_node(t, tp, new, key); 1087d5d6487cSAlexander Duyck 1088d5d6487cSAlexander Duyck if (fa) { 1089d5d6487cSAlexander Duyck hlist_add_before_rcu(&new->fa_list, &fa->fa_list); 1090836a0123SAlexander Duyck } else { 1091d5d6487cSAlexander Duyck struct fib_alias *last; 1092d5d6487cSAlexander Duyck 1093d5d6487cSAlexander Duyck hlist_for_each_entry(last, &l->leaf, fa_list) { 1094d5d6487cSAlexander Duyck if (new->fa_slen < last->fa_slen) 1095d5d6487cSAlexander Duyck break; 10960b65bd97SAlexander Duyck if ((new->fa_slen == last->fa_slen) && 10970b65bd97SAlexander Duyck (new->tb_id > last->tb_id)) 10980b65bd97SAlexander Duyck break; 1099d5d6487cSAlexander Duyck fa = last; 1100836a0123SAlexander Duyck } 1101836a0123SAlexander Duyck 1102d5d6487cSAlexander Duyck if (fa) 1103d5d6487cSAlexander Duyck hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); 1104d5d6487cSAlexander Duyck else 1105d5d6487cSAlexander Duyck hlist_add_head_rcu(&new->fa_list, &l->leaf); 110619baf839SRobert Olsson } 110719baf839SRobert Olsson 1108d5d6487cSAlexander Duyck /* if we added to the tail node then we need to update slen */ 1109d5d6487cSAlexander Duyck if (l->slen < new->fa_slen) { 1110d5d6487cSAlexander Duyck l->slen = new->fa_slen; 1111d5d6487cSAlexander Duyck leaf_push_suffix(tp, l); 1112d5d6487cSAlexander Duyck } 1113d5d6487cSAlexander Duyck 1114d5d6487cSAlexander Duyck return 0; 1115d5d6487cSAlexander Duyck } 1116d5d6487cSAlexander Duyck 1117d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1118b90eb754SJiri Pirko int fib_table_insert(struct net *net, struct fib_table *tb, 1119b90eb754SJiri Pirko struct fib_config *cfg) 112019baf839SRobert Olsson { 112119baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 112219baf839SRobert Olsson struct fib_alias *fa, *new_fa; 112335c6edacSAlexander Duyck struct key_vector *l, *tp; 1124b93e1fa7SGuillaume Nault u16 nlflags = NLM_F_EXCL; 112519baf839SRobert Olsson struct fib_info *fi; 112679e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 112779e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 11284e902c57SThomas Graf u8 tos = cfg->fc_tos; 1129d4a975e8SAlexander Duyck u32 key; 113019baf839SRobert Olsson int err; 113119baf839SRobert Olsson 11325786ec60SAlexander Duyck if (plen > KEYLENGTH) 113319baf839SRobert Olsson return -EINVAL; 113419baf839SRobert Olsson 11354e902c57SThomas Graf key = ntohl(cfg->fc_dst); 113619baf839SRobert Olsson 11372dfe55b4SPatrick McHardy pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); 113819baf839SRobert Olsson 1139d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 114019baf839SRobert Olsson return -EINVAL; 114119baf839SRobert Olsson 11424e902c57SThomas Graf fi = fib_create_info(cfg); 11434e902c57SThomas Graf if (IS_ERR(fi)) { 11444e902c57SThomas Graf err = PTR_ERR(fi); 114519baf839SRobert Olsson goto err; 11464e902c57SThomas Graf } 114719baf839SRobert Olsson 1148d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 11490b65bd97SAlexander Duyck fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, 11500b65bd97SAlexander Duyck tb->tb_id) : NULL; 115119baf839SRobert Olsson 115219baf839SRobert Olsson /* Now fa, if non-NULL, points to the first fib alias 115319baf839SRobert Olsson * with the same keys [prefix,tos,priority], if such key already 115419baf839SRobert Olsson * exists or to the node before which we will insert new one. 115519baf839SRobert Olsson * 115619baf839SRobert Olsson * If fa is NULL, we will need to allocate a new one and 115756315f9eSAlexander Duyck * insert to the tail of the section matching the suffix length 115856315f9eSAlexander Duyck * of the new alias. 115919baf839SRobert Olsson */ 116019baf839SRobert Olsson 1161936f6f8eSJulian Anastasov if (fa && fa->fa_tos == tos && 1162936f6f8eSJulian Anastasov fa->fa_info->fib_priority == fi->fib_priority) { 1163936f6f8eSJulian Anastasov struct fib_alias *fa_first, *fa_match; 116419baf839SRobert Olsson 116519baf839SRobert Olsson err = -EEXIST; 11664e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_EXCL) 116719baf839SRobert Olsson goto out; 116819baf839SRobert Olsson 1169b93e1fa7SGuillaume Nault nlflags &= ~NLM_F_EXCL; 1170b93e1fa7SGuillaume Nault 1171936f6f8eSJulian Anastasov /* We have 2 goals: 1172936f6f8eSJulian Anastasov * 1. Find exact match for type, scope, fib_info to avoid 1173936f6f8eSJulian Anastasov * duplicate routes 1174936f6f8eSJulian Anastasov * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it 1175936f6f8eSJulian Anastasov */ 1176936f6f8eSJulian Anastasov fa_match = NULL; 1177936f6f8eSJulian Anastasov fa_first = fa; 117856315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 11790b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 11800b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 11810b65bd97SAlexander Duyck (fa->fa_tos != tos)) 1182936f6f8eSJulian Anastasov break; 1183936f6f8eSJulian Anastasov if (fa->fa_info->fib_priority != fi->fib_priority) 1184936f6f8eSJulian Anastasov break; 1185936f6f8eSJulian Anastasov if (fa->fa_type == cfg->fc_type && 1186936f6f8eSJulian Anastasov fa->fa_info == fi) { 1187936f6f8eSJulian Anastasov fa_match = fa; 1188936f6f8eSJulian Anastasov break; 1189936f6f8eSJulian Anastasov } 1190936f6f8eSJulian Anastasov } 1191936f6f8eSJulian Anastasov 11924e902c57SThomas Graf if (cfg->fc_nlflags & NLM_F_REPLACE) { 119319baf839SRobert Olsson struct fib_info *fi_drop; 119419baf839SRobert Olsson u8 state; 119519baf839SRobert Olsson 1196b93e1fa7SGuillaume Nault nlflags |= NLM_F_REPLACE; 1197936f6f8eSJulian Anastasov fa = fa_first; 1198936f6f8eSJulian Anastasov if (fa_match) { 1199936f6f8eSJulian Anastasov if (fa == fa_match) 1200936f6f8eSJulian Anastasov err = 0; 12016725033fSJoonwoo Park goto out; 1202936f6f8eSJulian Anastasov } 12032373ce1cSRobert Olsson err = -ENOBUFS; 1204e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 120551456b29SIan Morris if (!new_fa) 12062373ce1cSRobert Olsson goto out; 120719baf839SRobert Olsson 120819baf839SRobert Olsson fi_drop = fa->fa_info; 12092373ce1cSRobert Olsson new_fa->fa_tos = fa->fa_tos; 12102373ce1cSRobert Olsson new_fa->fa_info = fi; 12114e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 121219baf839SRobert Olsson state = fa->fa_state; 1213936f6f8eSJulian Anastasov new_fa->fa_state = state & ~FA_S_ACCESSED; 12149b6ebad5SAlexander Duyck new_fa->fa_slen = fa->fa_slen; 1215d4e64c29SMichal Kubeček new_fa->tb_id = tb->tb_id; 12162392debcSJulian Anastasov new_fa->fa_default = -1; 121719baf839SRobert Olsson 1218ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, 12198e05fd71SScott Feldman new_fa->fa_tos, 12208e05fd71SScott Feldman cfg->fc_type, 1221f8f21471SScott Feldman cfg->fc_nlflags, 12228e05fd71SScott Feldman tb->tb_id); 12238e05fd71SScott Feldman if (err) { 1224ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 12258e05fd71SScott Feldman kmem_cache_free(fn_alias_kmem, new_fa); 12268e05fd71SScott Feldman goto out; 12278e05fd71SScott Feldman } 12288e05fd71SScott Feldman 122956315f9eSAlexander Duyck hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 12308e05fd71SScott Feldman 12312373ce1cSRobert Olsson alias_free_mem_rcu(fa); 123219baf839SRobert Olsson 123319baf839SRobert Olsson fib_release_info(fi_drop); 123419baf839SRobert Olsson if (state & FA_S_ACCESSED) 12354ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1236b90eb754SJiri Pirko 1237b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, 1238b90eb754SJiri Pirko key, plen, fi, 1239b90eb754SJiri Pirko new_fa->fa_tos, cfg->fc_type, 1240b90eb754SJiri Pirko tb->tb_id, cfg->fc_nlflags); 1241b8f55831SMilan Kocian rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1242b93e1fa7SGuillaume Nault tb->tb_id, &cfg->fc_nlinfo, nlflags); 124319baf839SRobert Olsson 124419baf839SRobert Olsson goto succeeded; 124519baf839SRobert Olsson } 124619baf839SRobert Olsson /* Error if we find a perfect match which 124719baf839SRobert Olsson * uses the same scope, type, and nexthop 124819baf839SRobert Olsson * information. 124919baf839SRobert Olsson */ 1250936f6f8eSJulian Anastasov if (fa_match) 125119baf839SRobert Olsson goto out; 1252a07f5f50SStephen Hemminger 1253a2bb6d7dSRoopa Prabhu if (cfg->fc_nlflags & NLM_F_APPEND) 1254b93e1fa7SGuillaume Nault nlflags |= NLM_F_APPEND; 1255a2bb6d7dSRoopa Prabhu else 1256936f6f8eSJulian Anastasov fa = fa_first; 125719baf839SRobert Olsson } 125819baf839SRobert Olsson err = -ENOENT; 12594e902c57SThomas Graf if (!(cfg->fc_nlflags & NLM_F_CREATE)) 126019baf839SRobert Olsson goto out; 126119baf839SRobert Olsson 1262b93e1fa7SGuillaume Nault nlflags |= NLM_F_CREATE; 126319baf839SRobert Olsson err = -ENOBUFS; 1264e94b1766SChristoph Lameter new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 126551456b29SIan Morris if (!new_fa) 126619baf839SRobert Olsson goto out; 126719baf839SRobert Olsson 126819baf839SRobert Olsson new_fa->fa_info = fi; 126919baf839SRobert Olsson new_fa->fa_tos = tos; 12704e902c57SThomas Graf new_fa->fa_type = cfg->fc_type; 127119baf839SRobert Olsson new_fa->fa_state = 0; 127279e5ad2cSAlexander Duyck new_fa->fa_slen = slen; 12730ddcf43dSAlexander Duyck new_fa->tb_id = tb->tb_id; 12742392debcSJulian Anastasov new_fa->fa_default = -1; 127519baf839SRobert Olsson 12768e05fd71SScott Feldman /* (Optionally) offload fib entry to switch hardware. */ 1277ebb9a03aSJiri Pirko err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, 1278ebb9a03aSJiri Pirko cfg->fc_nlflags, tb->tb_id); 12798e05fd71SScott Feldman if (err) { 1280ebb9a03aSJiri Pirko switchdev_fib_ipv4_abort(fi); 12818e05fd71SScott Feldman goto out_free_new_fa; 12828e05fd71SScott Feldman } 12838e05fd71SScott Feldman 12849b6ebad5SAlexander Duyck /* Insert new entry to the list. */ 1285d5d6487cSAlexander Duyck err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1286d5d6487cSAlexander Duyck if (err) 12878e05fd71SScott Feldman goto out_sw_fib_del; 128819baf839SRobert Olsson 128921d8c49eSDavid S. Miller if (!plen) 129021d8c49eSDavid S. Miller tb->tb_num_default++; 129121d8c49eSDavid S. Miller 12924ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 1293b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, 1294b90eb754SJiri Pirko cfg->fc_type, tb->tb_id, cfg->fc_nlflags); 12950ddcf43dSAlexander Duyck rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1296a2bb6d7dSRoopa Prabhu &cfg->fc_nlinfo, nlflags); 129719baf839SRobert Olsson succeeded: 129819baf839SRobert Olsson return 0; 1299f835e471SRobert Olsson 13008e05fd71SScott Feldman out_sw_fib_del: 1301ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); 1302f835e471SRobert Olsson out_free_new_fa: 1303f835e471SRobert Olsson kmem_cache_free(fn_alias_kmem, new_fa); 130419baf839SRobert Olsson out: 130519baf839SRobert Olsson fib_release_info(fi); 130691b9a277SOlof Johansson err: 130719baf839SRobert Olsson return err; 130819baf839SRobert Olsson } 130919baf839SRobert Olsson 131035c6edacSAlexander Duyck static inline t_key prefix_mismatch(t_key key, struct key_vector *n) 13119f9e636dSAlexander Duyck { 13129f9e636dSAlexander Duyck t_key prefix = n->key; 13139f9e636dSAlexander Duyck 13149f9e636dSAlexander Duyck return (key ^ prefix) & (prefix | -prefix); 13159f9e636dSAlexander Duyck } 13169f9e636dSAlexander Duyck 1317345e9b54SAlexander Duyck /* should be called with rcu_read_lock */ 131822bd5b9bSDavid S. Miller int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1319ebc0ffaeSEric Dumazet struct fib_result *res, int fib_flags) 132019baf839SRobert Olsson { 132119baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 13228274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 13238274a97aSAlexander Duyck struct trie_use_stats __percpu *stats = t->stats; 13248274a97aSAlexander Duyck #endif 13259f9e636dSAlexander Duyck const t_key key = ntohl(flp->daddr); 132635c6edacSAlexander Duyck struct key_vector *n, *pn; 132779e5ad2cSAlexander Duyck struct fib_alias *fa; 132871e8b67dSAlexander Duyck unsigned long index; 13299f9e636dSAlexander Duyck t_key cindex; 133019baf839SRobert Olsson 1331f6d3c192SDavid Ahern trace_fib_table_lookup(tb->tb_id, flp); 1332f6d3c192SDavid Ahern 133388bae714SAlexander Duyck pn = t->kv; 133488bae714SAlexander Duyck cindex = 0; 133588bae714SAlexander Duyck 133688bae714SAlexander Duyck n = get_child_rcu(pn, cindex); 133719baf839SRobert Olsson if (!n) 1338345e9b54SAlexander Duyck return -EAGAIN; 133919baf839SRobert Olsson 134019baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 13418274a97aSAlexander Duyck this_cpu_inc(stats->gets); 134219baf839SRobert Olsson #endif 134319baf839SRobert Olsson 13449f9e636dSAlexander Duyck /* Step 1: Travel to the longest prefix match in the trie */ 13459f9e636dSAlexander Duyck for (;;) { 134688bae714SAlexander Duyck index = get_cindex(key, n); 13479f9e636dSAlexander Duyck 13489f9e636dSAlexander Duyck /* This bit of code is a bit tricky but it combines multiple 13499f9e636dSAlexander Duyck * checks into a single check. The prefix consists of the 13509f9e636dSAlexander Duyck * prefix plus zeros for the "bits" in the prefix. The index 13519f9e636dSAlexander Duyck * is the difference between the key and this value. From 13529f9e636dSAlexander Duyck * this we can actually derive several pieces of data. 135371e8b67dSAlexander Duyck * if (index >= (1ul << bits)) 13549f9e636dSAlexander Duyck * we have a mismatch in skip bits and failed 1355b3832117SAlexander Duyck * else 1356b3832117SAlexander Duyck * we know the value is cindex 135771e8b67dSAlexander Duyck * 135871e8b67dSAlexander Duyck * This check is safe even if bits == KEYLENGTH due to the 135971e8b67dSAlexander Duyck * fact that we can only allocate a node with 32 bits if a 136071e8b67dSAlexander Duyck * long is greater than 32 bits. 13619f9e636dSAlexander Duyck */ 136271e8b67dSAlexander Duyck if (index >= (1ul << n->bits)) 13639f9e636dSAlexander Duyck break; 13649f9e636dSAlexander Duyck 13659f9e636dSAlexander Duyck /* we have found a leaf. Prefixes have already been compared */ 13669f9e636dSAlexander Duyck if (IS_LEAF(n)) 1367a07f5f50SStephen Hemminger goto found; 13689f9e636dSAlexander Duyck 13699f9e636dSAlexander Duyck /* only record pn and cindex if we are going to be chopping 13709f9e636dSAlexander Duyck * bits later. Otherwise we are just wasting cycles. 13719f9e636dSAlexander Duyck */ 13725405afd1SAlexander Duyck if (n->slen > n->pos) { 13739f9e636dSAlexander Duyck pn = n; 13749f9e636dSAlexander Duyck cindex = index; 137519baf839SRobert Olsson } 1376a07f5f50SStephen Hemminger 1377754baf8dSAlexander Duyck n = get_child_rcu(n, index); 13789f9e636dSAlexander Duyck if (unlikely(!n)) 13799f9e636dSAlexander Duyck goto backtrace; 13809f9e636dSAlexander Duyck } 138119baf839SRobert Olsson 13829f9e636dSAlexander Duyck /* Step 2: Sort out leaves and begin backtracing for longest prefix */ 13839f9e636dSAlexander Duyck for (;;) { 13849f9e636dSAlexander Duyck /* record the pointer where our next node pointer is stored */ 138535c6edacSAlexander Duyck struct key_vector __rcu **cptr = n->tnode; 138619baf839SRobert Olsson 13879f9e636dSAlexander Duyck /* This test verifies that none of the bits that differ 13889f9e636dSAlexander Duyck * between the key and the prefix exist in the region of 13899f9e636dSAlexander Duyck * the lsb and higher in the prefix. 13909f9e636dSAlexander Duyck */ 13915405afd1SAlexander Duyck if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) 13929f9e636dSAlexander Duyck goto backtrace; 139319baf839SRobert Olsson 13949f9e636dSAlexander Duyck /* exit out and process leaf */ 13959f9e636dSAlexander Duyck if (unlikely(IS_LEAF(n))) 13969f9e636dSAlexander Duyck break; 139719baf839SRobert Olsson 13989f9e636dSAlexander Duyck /* Don't bother recording parent info. Since we are in 13999f9e636dSAlexander Duyck * prefix match mode we will have to come back to wherever 14009f9e636dSAlexander Duyck * we started this traversal anyway 14019f9e636dSAlexander Duyck */ 14029f9e636dSAlexander Duyck 14039f9e636dSAlexander Duyck while ((n = rcu_dereference(*cptr)) == NULL) { 14049f9e636dSAlexander Duyck backtrace: 140519baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14069f9e636dSAlexander Duyck if (!n) 14078274a97aSAlexander Duyck this_cpu_inc(stats->null_node_hit); 140819baf839SRobert Olsson #endif 14099f9e636dSAlexander Duyck /* If we are at cindex 0 there are no more bits for 14109f9e636dSAlexander Duyck * us to strip at this level so we must ascend back 14119f9e636dSAlexander Duyck * up one level to see if there are any more bits to 14129f9e636dSAlexander Duyck * be stripped there. 141319baf839SRobert Olsson */ 14149f9e636dSAlexander Duyck while (!cindex) { 14159f9e636dSAlexander Duyck t_key pkey = pn->key; 141619baf839SRobert Olsson 141788bae714SAlexander Duyck /* If we don't have a parent then there is 141888bae714SAlexander Duyck * nothing for us to do as we do not have any 141988bae714SAlexander Duyck * further nodes to parse. 142088bae714SAlexander Duyck */ 142188bae714SAlexander Duyck if (IS_TRIE(pn)) 1422345e9b54SAlexander Duyck return -EAGAIN; 142319baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 14248274a97aSAlexander Duyck this_cpu_inc(stats->backtrack); 142519baf839SRobert Olsson #endif 14269f9e636dSAlexander Duyck /* Get Child's index */ 142788bae714SAlexander Duyck pn = node_parent_rcu(pn); 14289f9e636dSAlexander Duyck cindex = get_index(pkey, pn); 14299f9e636dSAlexander Duyck } 14309f9e636dSAlexander Duyck 14319f9e636dSAlexander Duyck /* strip the least significant bit from the cindex */ 14329f9e636dSAlexander Duyck cindex &= cindex - 1; 14339f9e636dSAlexander Duyck 14349f9e636dSAlexander Duyck /* grab pointer for next child node */ 143541b489fdSAlexander Duyck cptr = &pn->tnode[cindex]; 143619baf839SRobert Olsson } 143719baf839SRobert Olsson } 14389f9e636dSAlexander Duyck 143919baf839SRobert Olsson found: 144071e8b67dSAlexander Duyck /* this line carries forward the xor from earlier in the function */ 144171e8b67dSAlexander Duyck index = key ^ n->key; 144271e8b67dSAlexander Duyck 14439f9e636dSAlexander Duyck /* Step 3: Process the leaf, if that fails fall back to backtracing */ 144479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 1445345e9b54SAlexander Duyck struct fib_info *fi = fa->fa_info; 1446345e9b54SAlexander Duyck int nhsel, err; 1447345e9b54SAlexander Duyck 1448a5829f53SAlexander Duyck if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { 1449a5829f53SAlexander Duyck if (index >= (1ul << fa->fa_slen)) 14509b6ebad5SAlexander Duyck continue; 1451a5829f53SAlexander Duyck } 1452345e9b54SAlexander Duyck if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1453345e9b54SAlexander Duyck continue; 1454345e9b54SAlexander Duyck if (fi->fib_dead) 1455345e9b54SAlexander Duyck continue; 1456345e9b54SAlexander Duyck if (fa->fa_info->fib_scope < flp->flowi4_scope) 1457345e9b54SAlexander Duyck continue; 1458345e9b54SAlexander Duyck fib_alias_accessed(fa); 1459345e9b54SAlexander Duyck err = fib_props[fa->fa_type].error; 1460345e9b54SAlexander Duyck if (unlikely(err < 0)) { 1461345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1462345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1463345e9b54SAlexander Duyck #endif 1464345e9b54SAlexander Duyck return err; 1465345e9b54SAlexander Duyck } 1466345e9b54SAlexander Duyck if (fi->fib_flags & RTNH_F_DEAD) 1467345e9b54SAlexander Duyck continue; 1468345e9b54SAlexander Duyck for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1469345e9b54SAlexander Duyck const struct fib_nh *nh = &fi->fib_nh[nhsel]; 14700eeb075fSAndy Gospodarek struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); 1471345e9b54SAlexander Duyck 1472345e9b54SAlexander Duyck if (nh->nh_flags & RTNH_F_DEAD) 1473345e9b54SAlexander Duyck continue; 14740eeb075fSAndy Gospodarek if (in_dev && 14750eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 14760eeb075fSAndy Gospodarek nh->nh_flags & RTNH_F_LINKDOWN && 14770eeb075fSAndy Gospodarek !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 14780eeb075fSAndy Gospodarek continue; 147958189ca7SDavid Ahern if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { 1480613d09b3SDavid Ahern if (flp->flowi4_oif && 1481613d09b3SDavid Ahern flp->flowi4_oif != nh->nh_oif) 1482345e9b54SAlexander Duyck continue; 1483613d09b3SDavid Ahern } 1484345e9b54SAlexander Duyck 1485345e9b54SAlexander Duyck if (!(fib_flags & FIB_LOOKUP_NOREF)) 1486345e9b54SAlexander Duyck atomic_inc(&fi->fib_clntref); 1487345e9b54SAlexander Duyck 14889b6ebad5SAlexander Duyck res->prefixlen = KEYLENGTH - fa->fa_slen; 1489345e9b54SAlexander Duyck res->nh_sel = nhsel; 1490345e9b54SAlexander Duyck res->type = fa->fa_type; 1491345e9b54SAlexander Duyck res->scope = fi->fib_scope; 1492345e9b54SAlexander Duyck res->fi = fi; 1493345e9b54SAlexander Duyck res->table = tb; 149479e5ad2cSAlexander Duyck res->fa_head = &n->leaf; 1495345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1496345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_passed); 1497345e9b54SAlexander Duyck #endif 1498f6d3c192SDavid Ahern trace_fib_table_lookup_nh(nh); 1499f6d3c192SDavid Ahern 1500345e9b54SAlexander Duyck return err; 1501345e9b54SAlexander Duyck } 1502345e9b54SAlexander Duyck } 1503345e9b54SAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 1504345e9b54SAlexander Duyck this_cpu_inc(stats->semantic_match_miss); 1505345e9b54SAlexander Duyck #endif 15069f9e636dSAlexander Duyck goto backtrace; 150719baf839SRobert Olsson } 15086fc01438SFlorian Westphal EXPORT_SYMBOL_GPL(fib_table_lookup); 150919baf839SRobert Olsson 151035c6edacSAlexander Duyck static void fib_remove_alias(struct trie *t, struct key_vector *tp, 151135c6edacSAlexander Duyck struct key_vector *l, struct fib_alias *old) 1512d5d6487cSAlexander Duyck { 1513d5d6487cSAlexander Duyck /* record the location of the previous list_info entry */ 1514d5d6487cSAlexander Duyck struct hlist_node **pprev = old->fa_list.pprev; 1515d5d6487cSAlexander Duyck struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); 1516d5d6487cSAlexander Duyck 1517d5d6487cSAlexander Duyck /* remove the fib_alias from the list */ 1518d5d6487cSAlexander Duyck hlist_del_rcu(&old->fa_list); 1519d5d6487cSAlexander Duyck 1520d5d6487cSAlexander Duyck /* if we emptied the list this leaf will be freed and we can sort 1521d5d6487cSAlexander Duyck * out parent suffix lengths as a part of trie_rebalance 1522d562f1f8SRobert Olsson */ 1523d5d6487cSAlexander Duyck if (hlist_empty(&l->leaf)) { 152488bae714SAlexander Duyck put_child_root(tp, l->key, NULL); 1525d5d6487cSAlexander Duyck node_free(l); 1526d5d6487cSAlexander Duyck trie_rebalance(t, tp); 1527d5d6487cSAlexander Duyck return; 1528d5d6487cSAlexander Duyck } 1529d5d6487cSAlexander Duyck 1530d5d6487cSAlexander Duyck /* only access fa if it is pointing at the last valid hlist_node */ 1531d5d6487cSAlexander Duyck if (*pprev) 1532d5d6487cSAlexander Duyck return; 1533d5d6487cSAlexander Duyck 1534d5d6487cSAlexander Duyck /* update the trie with the latest suffix length */ 1535d5d6487cSAlexander Duyck l->slen = fa->fa_slen; 1536d5d6487cSAlexander Duyck leaf_pull_suffix(tp, l); 1537d5d6487cSAlexander Duyck } 1538d5d6487cSAlexander Duyck 1539d5d6487cSAlexander Duyck /* Caller must hold RTNL. */ 1540b90eb754SJiri Pirko int fib_table_delete(struct net *net, struct fib_table *tb, 1541b90eb754SJiri Pirko struct fib_config *cfg) 154219baf839SRobert Olsson { 154319baf839SRobert Olsson struct trie *t = (struct trie *) tb->tb_data; 154419baf839SRobert Olsson struct fib_alias *fa, *fa_to_delete; 154535c6edacSAlexander Duyck struct key_vector *l, *tp; 154679e5ad2cSAlexander Duyck u8 plen = cfg->fc_dst_len; 154779e5ad2cSAlexander Duyck u8 slen = KEYLENGTH - plen; 1548d4a975e8SAlexander Duyck u8 tos = cfg->fc_tos; 1549d4a975e8SAlexander Duyck u32 key; 155091b9a277SOlof Johansson 155179e5ad2cSAlexander Duyck if (plen > KEYLENGTH) 155219baf839SRobert Olsson return -EINVAL; 155319baf839SRobert Olsson 15544e902c57SThomas Graf key = ntohl(cfg->fc_dst); 155519baf839SRobert Olsson 1556d4a975e8SAlexander Duyck if ((plen < KEYLENGTH) && (key << plen)) 155719baf839SRobert Olsson return -EINVAL; 155819baf839SRobert Olsson 1559d4a975e8SAlexander Duyck l = fib_find_node(t, &tp, key); 156019baf839SRobert Olsson if (!l) 156119baf839SRobert Olsson return -ESRCH; 156219baf839SRobert Olsson 15630b65bd97SAlexander Duyck fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); 156419baf839SRobert Olsson if (!fa) 156519baf839SRobert Olsson return -ESRCH; 156619baf839SRobert Olsson 15670c7770c7SStephen Hemminger pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 156819baf839SRobert Olsson 156919baf839SRobert Olsson fa_to_delete = NULL; 157056315f9eSAlexander Duyck hlist_for_each_entry_from(fa, fa_list) { 157119baf839SRobert Olsson struct fib_info *fi = fa->fa_info; 157219baf839SRobert Olsson 15730b65bd97SAlexander Duyck if ((fa->fa_slen != slen) || 15740b65bd97SAlexander Duyck (fa->tb_id != tb->tb_id) || 15750b65bd97SAlexander Duyck (fa->fa_tos != tos)) 157619baf839SRobert Olsson break; 157719baf839SRobert Olsson 15784e902c57SThomas Graf if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 15794e902c57SThomas Graf (cfg->fc_scope == RT_SCOPE_NOWHERE || 158037e826c5SDavid S. Miller fa->fa_info->fib_scope == cfg->fc_scope) && 158174cb3c10SJulian Anastasov (!cfg->fc_prefsrc || 158274cb3c10SJulian Anastasov fi->fib_prefsrc == cfg->fc_prefsrc) && 15834e902c57SThomas Graf (!cfg->fc_protocol || 15844e902c57SThomas Graf fi->fib_protocol == cfg->fc_protocol) && 15854e902c57SThomas Graf fib_nh_match(cfg, fi) == 0) { 158619baf839SRobert Olsson fa_to_delete = fa; 158719baf839SRobert Olsson break; 158819baf839SRobert Olsson } 158919baf839SRobert Olsson } 159019baf839SRobert Olsson 159191b9a277SOlof Johansson if (!fa_to_delete) 159291b9a277SOlof Johansson return -ESRCH; 159319baf839SRobert Olsson 1594ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, 15958e05fd71SScott Feldman cfg->fc_type, tb->tb_id); 15968e05fd71SScott Feldman 1597b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, 1598b90eb754SJiri Pirko fa_to_delete->fa_info, tos, cfg->fc_type, 1599b90eb754SJiri Pirko tb->tb_id, 0); 1600d5d6487cSAlexander Duyck rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1601b8f55831SMilan Kocian &cfg->fc_nlinfo, 0); 160219baf839SRobert Olsson 160321d8c49eSDavid S. Miller if (!plen) 160421d8c49eSDavid S. Miller tb->tb_num_default--; 160521d8c49eSDavid S. Miller 1606d5d6487cSAlexander Duyck fib_remove_alias(t, tp, l, fa_to_delete); 16077289e6ddSAlexander Duyck 1608d5d6487cSAlexander Duyck if (fa_to_delete->fa_state & FA_S_ACCESSED) 16094ccfe6d4SNicolas Dichtel rt_cache_flush(cfg->fc_nlinfo.nl_net); 161019baf839SRobert Olsson 1611d5d6487cSAlexander Duyck fib_release_info(fa_to_delete->fa_info); 1612d5d6487cSAlexander Duyck alias_free_mem_rcu(fa_to_delete); 161319baf839SRobert Olsson return 0; 161419baf839SRobert Olsson } 161519baf839SRobert Olsson 16168be33e95SAlexander Duyck /* Scan for the next leaf starting at the provided key value */ 161735c6edacSAlexander Duyck static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) 161819baf839SRobert Olsson { 161935c6edacSAlexander Duyck struct key_vector *pn, *n = *tn; 16208be33e95SAlexander Duyck unsigned long cindex; 162119baf839SRobert Olsson 16228be33e95SAlexander Duyck /* this loop is meant to try and find the key in the trie */ 162388bae714SAlexander Duyck do { 162488bae714SAlexander Duyck /* record parent and next child index */ 162588bae714SAlexander Duyck pn = n; 1626c2229fe1SAlexander Duyck cindex = (key > pn->key) ? get_index(key, pn) : 0; 162788bae714SAlexander Duyck 162888bae714SAlexander Duyck if (cindex >> pn->bits) 162988bae714SAlexander Duyck break; 163088bae714SAlexander Duyck 163188bae714SAlexander Duyck /* descend into the next child */ 163288bae714SAlexander Duyck n = get_child_rcu(pn, cindex++); 163388bae714SAlexander Duyck if (!n) 163488bae714SAlexander Duyck break; 16358be33e95SAlexander Duyck 16368be33e95SAlexander Duyck /* guarantee forward progress on the keys */ 16378be33e95SAlexander Duyck if (IS_LEAF(n) && (n->key >= key)) 16388be33e95SAlexander Duyck goto found; 163988bae714SAlexander Duyck } while (IS_TNODE(n)); 16408be33e95SAlexander Duyck 16418be33e95SAlexander Duyck /* this loop will search for the next leaf with a greater key */ 164288bae714SAlexander Duyck while (!IS_TRIE(pn)) { 16438be33e95SAlexander Duyck /* if we exhausted the parent node we will need to climb */ 16448be33e95SAlexander Duyck if (cindex >= (1ul << pn->bits)) { 16458be33e95SAlexander Duyck t_key pkey = pn->key; 16468be33e95SAlexander Duyck 16478be33e95SAlexander Duyck pn = node_parent_rcu(pn); 16488be33e95SAlexander Duyck cindex = get_index(pkey, pn) + 1; 16498be33e95SAlexander Duyck continue; 16508be33e95SAlexander Duyck } 16518be33e95SAlexander Duyck 16528be33e95SAlexander Duyck /* grab the next available node */ 1653754baf8dSAlexander Duyck n = get_child_rcu(pn, cindex++); 16548be33e95SAlexander Duyck if (!n) 165591b9a277SOlof Johansson continue; 165619baf839SRobert Olsson 16578be33e95SAlexander Duyck /* no need to compare keys since we bumped the index */ 16588be33e95SAlexander Duyck if (IS_LEAF(n)) 16598be33e95SAlexander Duyck goto found; 166082cfbb00SStephen Hemminger 166182cfbb00SStephen Hemminger /* Rescan start scanning in new node */ 16628be33e95SAlexander Duyck pn = n; 16638be33e95SAlexander Duyck cindex = 0; 166419baf839SRobert Olsson } 166582cfbb00SStephen Hemminger 16668be33e95SAlexander Duyck *tn = pn; 166782cfbb00SStephen Hemminger return NULL; /* Root of trie */ 16688be33e95SAlexander Duyck found: 16698be33e95SAlexander Duyck /* if we are at the limit for keys just return NULL for the tnode */ 167088bae714SAlexander Duyck *tn = pn; 1671adaf9816SAlexander Duyck return n; 167282cfbb00SStephen Hemminger } 167382cfbb00SStephen Hemminger 16740ddcf43dSAlexander Duyck static void fib_trie_free(struct fib_table *tb) 16750ddcf43dSAlexander Duyck { 16760ddcf43dSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 16770ddcf43dSAlexander Duyck struct key_vector *pn = t->kv; 16780ddcf43dSAlexander Duyck unsigned long cindex = 1; 16790ddcf43dSAlexander Duyck struct hlist_node *tmp; 16800ddcf43dSAlexander Duyck struct fib_alias *fa; 16810ddcf43dSAlexander Duyck 16820ddcf43dSAlexander Duyck /* walk trie in reverse order and free everything */ 16830ddcf43dSAlexander Duyck for (;;) { 16840ddcf43dSAlexander Duyck struct key_vector *n; 16850ddcf43dSAlexander Duyck 16860ddcf43dSAlexander Duyck if (!(cindex--)) { 16870ddcf43dSAlexander Duyck t_key pkey = pn->key; 16880ddcf43dSAlexander Duyck 16890ddcf43dSAlexander Duyck if (IS_TRIE(pn)) 16900ddcf43dSAlexander Duyck break; 16910ddcf43dSAlexander Duyck 16920ddcf43dSAlexander Duyck n = pn; 16930ddcf43dSAlexander Duyck pn = node_parent(pn); 16940ddcf43dSAlexander Duyck 16950ddcf43dSAlexander Duyck /* drop emptied tnode */ 16960ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 16970ddcf43dSAlexander Duyck node_free(n); 16980ddcf43dSAlexander Duyck 16990ddcf43dSAlexander Duyck cindex = get_index(pkey, pn); 17000ddcf43dSAlexander Duyck 17010ddcf43dSAlexander Duyck continue; 17020ddcf43dSAlexander Duyck } 17030ddcf43dSAlexander Duyck 17040ddcf43dSAlexander Duyck /* grab the next available node */ 17050ddcf43dSAlexander Duyck n = get_child(pn, cindex); 17060ddcf43dSAlexander Duyck if (!n) 17070ddcf43dSAlexander Duyck continue; 17080ddcf43dSAlexander Duyck 17090ddcf43dSAlexander Duyck if (IS_TNODE(n)) { 17100ddcf43dSAlexander Duyck /* record pn and cindex for leaf walking */ 17110ddcf43dSAlexander Duyck pn = n; 17120ddcf43dSAlexander Duyck cindex = 1ul << n->bits; 17130ddcf43dSAlexander Duyck 17140ddcf43dSAlexander Duyck continue; 17150ddcf43dSAlexander Duyck } 17160ddcf43dSAlexander Duyck 17170ddcf43dSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 17180ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 17190ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 17200ddcf43dSAlexander Duyck } 17210ddcf43dSAlexander Duyck 17220ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 17230ddcf43dSAlexander Duyck node_free(n); 17240ddcf43dSAlexander Duyck } 17250ddcf43dSAlexander Duyck 17260ddcf43dSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 17270ddcf43dSAlexander Duyck free_percpu(t->stats); 17280ddcf43dSAlexander Duyck #endif 17290ddcf43dSAlexander Duyck kfree(tb); 17300ddcf43dSAlexander Duyck } 17310ddcf43dSAlexander Duyck 17320ddcf43dSAlexander Duyck struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) 17330ddcf43dSAlexander Duyck { 17340ddcf43dSAlexander Duyck struct trie *ot = (struct trie *)oldtb->tb_data; 17350ddcf43dSAlexander Duyck struct key_vector *l, *tp = ot->kv; 17360ddcf43dSAlexander Duyck struct fib_table *local_tb; 17370ddcf43dSAlexander Duyck struct fib_alias *fa; 17380ddcf43dSAlexander Duyck struct trie *lt; 17390ddcf43dSAlexander Duyck t_key key = 0; 17400ddcf43dSAlexander Duyck 17410ddcf43dSAlexander Duyck if (oldtb->tb_data == oldtb->__data) 17420ddcf43dSAlexander Duyck return oldtb; 17430ddcf43dSAlexander Duyck 17440ddcf43dSAlexander Duyck local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); 17450ddcf43dSAlexander Duyck if (!local_tb) 17460ddcf43dSAlexander Duyck return NULL; 17470ddcf43dSAlexander Duyck 17480ddcf43dSAlexander Duyck lt = (struct trie *)local_tb->tb_data; 17490ddcf43dSAlexander Duyck 17500ddcf43dSAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 17510ddcf43dSAlexander Duyck struct key_vector *local_l = NULL, *local_tp; 17520ddcf43dSAlexander Duyck 17530ddcf43dSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 17540ddcf43dSAlexander Duyck struct fib_alias *new_fa; 17550ddcf43dSAlexander Duyck 17560ddcf43dSAlexander Duyck if (local_tb->tb_id != fa->tb_id) 17570ddcf43dSAlexander Duyck continue; 17580ddcf43dSAlexander Duyck 17590ddcf43dSAlexander Duyck /* clone fa for new local table */ 17600ddcf43dSAlexander Duyck new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 17610ddcf43dSAlexander Duyck if (!new_fa) 17620ddcf43dSAlexander Duyck goto out; 17630ddcf43dSAlexander Duyck 17640ddcf43dSAlexander Duyck memcpy(new_fa, fa, sizeof(*fa)); 17650ddcf43dSAlexander Duyck 17660ddcf43dSAlexander Duyck /* insert clone into table */ 17670ddcf43dSAlexander Duyck if (!local_l) 17680ddcf43dSAlexander Duyck local_l = fib_find_node(lt, &local_tp, l->key); 17690ddcf43dSAlexander Duyck 17700ddcf43dSAlexander Duyck if (fib_insert_alias(lt, local_tp, local_l, new_fa, 17710ddcf43dSAlexander Duyck NULL, l->key)) 17720ddcf43dSAlexander Duyck goto out; 17730ddcf43dSAlexander Duyck } 17740ddcf43dSAlexander Duyck 17750ddcf43dSAlexander Duyck /* stop loop if key wrapped back to 0 */ 17760ddcf43dSAlexander Duyck key = l->key + 1; 17770ddcf43dSAlexander Duyck if (key < l->key) 17780ddcf43dSAlexander Duyck break; 17790ddcf43dSAlexander Duyck } 17800ddcf43dSAlexander Duyck 17810ddcf43dSAlexander Duyck return local_tb; 17820ddcf43dSAlexander Duyck out: 17830ddcf43dSAlexander Duyck fib_trie_free(local_tb); 17840ddcf43dSAlexander Duyck 17850ddcf43dSAlexander Duyck return NULL; 17860ddcf43dSAlexander Duyck } 17870ddcf43dSAlexander Duyck 1788104616e7SScott Feldman /* Caller must hold RTNL */ 1789104616e7SScott Feldman void fib_table_flush_external(struct fib_table *tb) 1790104616e7SScott Feldman { 1791104616e7SScott Feldman struct trie *t = (struct trie *)tb->tb_data; 179288bae714SAlexander Duyck struct key_vector *pn = t->kv; 179388bae714SAlexander Duyck unsigned long cindex = 1; 179488bae714SAlexander Duyck struct hlist_node *tmp; 1795104616e7SScott Feldman struct fib_alias *fa; 1796104616e7SScott Feldman 1797104616e7SScott Feldman /* walk trie in reverse order */ 179888bae714SAlexander Duyck for (;;) { 17990ddcf43dSAlexander Duyck unsigned char slen = 0; 180088bae714SAlexander Duyck struct key_vector *n; 180188bae714SAlexander Duyck 180288bae714SAlexander Duyck if (!(cindex--)) { 1803104616e7SScott Feldman t_key pkey = pn->key; 1804104616e7SScott Feldman 180588bae714SAlexander Duyck /* cannot resize the trie vector */ 180688bae714SAlexander Duyck if (IS_TRIE(pn)) 180788bae714SAlexander Duyck break; 1808104616e7SScott Feldman 18090ddcf43dSAlexander Duyck /* resize completed node */ 18100ddcf43dSAlexander Duyck pn = resize(t, pn); 1811104616e7SScott Feldman cindex = get_index(pkey, pn); 181288bae714SAlexander Duyck 181388bae714SAlexander Duyck continue; 1814104616e7SScott Feldman } 1815104616e7SScott Feldman 1816104616e7SScott Feldman /* grab the next available node */ 1817754baf8dSAlexander Duyck n = get_child(pn, cindex); 181888bae714SAlexander Duyck if (!n) 181988bae714SAlexander Duyck continue; 182088bae714SAlexander Duyck 182188bae714SAlexander Duyck if (IS_TNODE(n)) { 182288bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 182388bae714SAlexander Duyck pn = n; 182488bae714SAlexander Duyck cindex = 1ul << n->bits; 182588bae714SAlexander Duyck 182688bae714SAlexander Duyck continue; 1827104616e7SScott Feldman } 1828104616e7SScott Feldman 182988bae714SAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 1830104616e7SScott Feldman struct fib_info *fi = fa->fa_info; 1831104616e7SScott Feldman 18320ddcf43dSAlexander Duyck /* if alias was cloned to local then we just 18330ddcf43dSAlexander Duyck * need to remove the local copy from main 18340ddcf43dSAlexander Duyck */ 18350ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 18360ddcf43dSAlexander Duyck hlist_del_rcu(&fa->fa_list); 18370ddcf43dSAlexander Duyck alias_free_mem_rcu(fa); 18380ddcf43dSAlexander Duyck continue; 18390ddcf43dSAlexander Duyck } 18400ddcf43dSAlexander Duyck 18410ddcf43dSAlexander Duyck /* record local slen */ 18420ddcf43dSAlexander Duyck slen = fa->fa_slen; 18430ddcf43dSAlexander Duyck 1844eea39946SRoopa Prabhu if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) 184572be7260SAlexander Duyck continue; 184672be7260SAlexander Duyck 1847ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1848ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1849ebb9a03aSJiri Pirko tb->tb_id); 1850104616e7SScott Feldman } 18510ddcf43dSAlexander Duyck 18520ddcf43dSAlexander Duyck /* update leaf slen */ 18530ddcf43dSAlexander Duyck n->slen = slen; 18540ddcf43dSAlexander Duyck 18550ddcf43dSAlexander Duyck if (hlist_empty(&n->leaf)) { 18560ddcf43dSAlexander Duyck put_child_root(pn, n->key, NULL); 18570ddcf43dSAlexander Duyck node_free(n); 18580ddcf43dSAlexander Duyck } 185988bae714SAlexander Duyck } 1860104616e7SScott Feldman } 1861104616e7SScott Feldman 18628be33e95SAlexander Duyck /* Caller must hold RTNL. */ 1863b90eb754SJiri Pirko int fib_table_flush(struct net *net, struct fib_table *tb) 186419baf839SRobert Olsson { 186519baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 186688bae714SAlexander Duyck struct key_vector *pn = t->kv; 186788bae714SAlexander Duyck unsigned long cindex = 1; 18687289e6ddSAlexander Duyck struct hlist_node *tmp; 18697289e6ddSAlexander Duyck struct fib_alias *fa; 187082cfbb00SStephen Hemminger int found = 0; 187119baf839SRobert Olsson 18727289e6ddSAlexander Duyck /* walk trie in reverse order */ 187388bae714SAlexander Duyck for (;;) { 187488bae714SAlexander Duyck unsigned char slen = 0; 187588bae714SAlexander Duyck struct key_vector *n; 187688bae714SAlexander Duyck 187788bae714SAlexander Duyck if (!(cindex--)) { 18787289e6ddSAlexander Duyck t_key pkey = pn->key; 18797289e6ddSAlexander Duyck 188088bae714SAlexander Duyck /* cannot resize the trie vector */ 188188bae714SAlexander Duyck if (IS_TRIE(pn)) 188288bae714SAlexander Duyck break; 18837289e6ddSAlexander Duyck 18847289e6ddSAlexander Duyck /* resize completed node */ 188588bae714SAlexander Duyck pn = resize(t, pn); 18867289e6ddSAlexander Duyck cindex = get_index(pkey, pn); 188788bae714SAlexander Duyck 188888bae714SAlexander Duyck continue; 188964c62723SAlexander Duyck } 189064c62723SAlexander Duyck 18917289e6ddSAlexander Duyck /* grab the next available node */ 1892754baf8dSAlexander Duyck n = get_child(pn, cindex); 189388bae714SAlexander Duyck if (!n) 189488bae714SAlexander Duyck continue; 189519baf839SRobert Olsson 189688bae714SAlexander Duyck if (IS_TNODE(n)) { 189788bae714SAlexander Duyck /* record pn and cindex for leaf walking */ 189888bae714SAlexander Duyck pn = n; 189988bae714SAlexander Duyck cindex = 1ul << n->bits; 190088bae714SAlexander Duyck 190188bae714SAlexander Duyck continue; 190288bae714SAlexander Duyck } 19037289e6ddSAlexander Duyck 19047289e6ddSAlexander Duyck hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 19057289e6ddSAlexander Duyck struct fib_info *fi = fa->fa_info; 19067289e6ddSAlexander Duyck 190788bae714SAlexander Duyck if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { 190888bae714SAlexander Duyck slen = fa->fa_slen; 190988bae714SAlexander Duyck continue; 191088bae714SAlexander Duyck } 191188bae714SAlexander Duyck 1912ebb9a03aSJiri Pirko switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, 1913ebb9a03aSJiri Pirko fi, fa->fa_tos, fa->fa_type, 1914ebb9a03aSJiri Pirko tb->tb_id); 1915b90eb754SJiri Pirko call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1916b90eb754SJiri Pirko n->key, 1917b90eb754SJiri Pirko KEYLENGTH - fa->fa_slen, 1918b90eb754SJiri Pirko fi, fa->fa_tos, fa->fa_type, 1919b90eb754SJiri Pirko tb->tb_id, 0); 19207289e6ddSAlexander Duyck hlist_del_rcu(&fa->fa_list); 19217289e6ddSAlexander Duyck fib_release_info(fa->fa_info); 19227289e6ddSAlexander Duyck alias_free_mem_rcu(fa); 19237289e6ddSAlexander Duyck found++; 19247289e6ddSAlexander Duyck } 19257289e6ddSAlexander Duyck 19267289e6ddSAlexander Duyck /* update leaf slen */ 19277289e6ddSAlexander Duyck n->slen = slen; 19287289e6ddSAlexander Duyck 19297289e6ddSAlexander Duyck if (hlist_empty(&n->leaf)) { 193088bae714SAlexander Duyck put_child_root(pn, n->key, NULL); 19317289e6ddSAlexander Duyck node_free(n); 19327289e6ddSAlexander Duyck } 193388bae714SAlexander Duyck } 19347289e6ddSAlexander Duyck 19350c7770c7SStephen Hemminger pr_debug("trie_flush found=%d\n", found); 193619baf839SRobert Olsson return found; 193719baf839SRobert Olsson } 193819baf839SRobert Olsson 1939a7e53531SAlexander Duyck static void __trie_free_rcu(struct rcu_head *head) 19404aa2c466SPavel Emelyanov { 1941a7e53531SAlexander Duyck struct fib_table *tb = container_of(head, struct fib_table, rcu); 19428274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 19438274a97aSAlexander Duyck struct trie *t = (struct trie *)tb->tb_data; 19448274a97aSAlexander Duyck 19450ddcf43dSAlexander Duyck if (tb->tb_data == tb->__data) 19468274a97aSAlexander Duyck free_percpu(t->stats); 19478274a97aSAlexander Duyck #endif /* CONFIG_IP_FIB_TRIE_STATS */ 19484aa2c466SPavel Emelyanov kfree(tb); 19494aa2c466SPavel Emelyanov } 19504aa2c466SPavel Emelyanov 1951a7e53531SAlexander Duyck void fib_free_table(struct fib_table *tb) 1952a7e53531SAlexander Duyck { 1953a7e53531SAlexander Duyck call_rcu(&tb->rcu, __trie_free_rcu); 1954a7e53531SAlexander Duyck } 1955a7e53531SAlexander Duyck 195635c6edacSAlexander Duyck static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 195719baf839SRobert Olsson struct sk_buff *skb, struct netlink_callback *cb) 195819baf839SRobert Olsson { 195979e5ad2cSAlexander Duyck __be32 xkey = htonl(l->key); 196019baf839SRobert Olsson struct fib_alias *fa; 196179e5ad2cSAlexander Duyck int i, s_i; 196219baf839SRobert Olsson 196379e5ad2cSAlexander Duyck s_i = cb->args[4]; 196419baf839SRobert Olsson i = 0; 196519baf839SRobert Olsson 19662373ce1cSRobert Olsson /* rcu_read_lock is hold by caller */ 196779e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 196819baf839SRobert Olsson if (i < s_i) { 196919baf839SRobert Olsson i++; 197019baf839SRobert Olsson continue; 197119baf839SRobert Olsson } 197219baf839SRobert Olsson 19730ddcf43dSAlexander Duyck if (tb->tb_id != fa->tb_id) { 19740ddcf43dSAlexander Duyck i++; 19750ddcf43dSAlexander Duyck continue; 19760ddcf43dSAlexander Duyck } 19770ddcf43dSAlexander Duyck 197815e47304SEric W. Biederman if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 197919baf839SRobert Olsson cb->nlh->nlmsg_seq, 198019baf839SRobert Olsson RTM_NEWROUTE, 198119baf839SRobert Olsson tb->tb_id, 198219baf839SRobert Olsson fa->fa_type, 1983be403ea1SThomas Graf xkey, 19849b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 198519baf839SRobert Olsson fa->fa_tos, 198664347f78SStephen Hemminger fa->fa_info, NLM_F_MULTI) < 0) { 198771d67e66SStephen Hemminger cb->args[4] = i; 198819baf839SRobert Olsson return -1; 198919baf839SRobert Olsson } 1990a88ee229SStephen Hemminger i++; 199119baf839SRobert Olsson } 1992a88ee229SStephen Hemminger 199371d67e66SStephen Hemminger cb->args[4] = i; 199419baf839SRobert Olsson return skb->len; 199519baf839SRobert Olsson } 199619baf839SRobert Olsson 1997a7e53531SAlexander Duyck /* rcu_read_lock needs to be hold by caller from readside */ 199816c6cf8bSStephen Hemminger int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 1999a07f5f50SStephen Hemminger struct netlink_callback *cb) 200019baf839SRobert Olsson { 200119baf839SRobert Olsson struct trie *t = (struct trie *)tb->tb_data; 200288bae714SAlexander Duyck struct key_vector *l, *tp = t->kv; 2003d5ce8a0eSStephen Hemminger /* Dump starting at last key. 2004d5ce8a0eSStephen Hemminger * Note: 0.0.0.0/0 (ie default) is first key. 2005d5ce8a0eSStephen Hemminger */ 20068be33e95SAlexander Duyck int count = cb->args[2]; 20078be33e95SAlexander Duyck t_key key = cb->args[3]; 2008a88ee229SStephen Hemminger 20098be33e95SAlexander Duyck while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 2010a88ee229SStephen Hemminger if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 20118be33e95SAlexander Duyck cb->args[3] = key; 20128be33e95SAlexander Duyck cb->args[2] = count; 201319baf839SRobert Olsson return -1; 201419baf839SRobert Olsson } 2015d5ce8a0eSStephen Hemminger 201671d67e66SStephen Hemminger ++count; 20178be33e95SAlexander Duyck key = l->key + 1; 20188be33e95SAlexander Duyck 201971d67e66SStephen Hemminger memset(&cb->args[4], 0, 202071d67e66SStephen Hemminger sizeof(cb->args) - 4*sizeof(cb->args[0])); 20218be33e95SAlexander Duyck 20228be33e95SAlexander Duyck /* stop loop if key wrapped back to 0 */ 20238be33e95SAlexander Duyck if (key < l->key) 20248be33e95SAlexander Duyck break; 2025a88ee229SStephen Hemminger } 20268be33e95SAlexander Duyck 20278be33e95SAlexander Duyck cb->args[3] = key; 20288be33e95SAlexander Duyck cb->args[2] = count; 20298be33e95SAlexander Duyck 2030a88ee229SStephen Hemminger return skb->len; 2031a88ee229SStephen Hemminger } 203219baf839SRobert Olsson 20335348ba85SDavid S. Miller void __init fib_trie_init(void) 20347f9b8052SStephen Hemminger { 2035a07f5f50SStephen Hemminger fn_alias_kmem = kmem_cache_create("ip_fib_alias", 2036a07f5f50SStephen Hemminger sizeof(struct fib_alias), 2037bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 2038bc3c8c1eSStephen Hemminger 2039bc3c8c1eSStephen Hemminger trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 204041b489fdSAlexander Duyck LEAF_SIZE, 2041bc3c8c1eSStephen Hemminger 0, SLAB_PANIC, NULL); 20427f9b8052SStephen Hemminger } 204319baf839SRobert Olsson 20440ddcf43dSAlexander Duyck struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) 204519baf839SRobert Olsson { 204619baf839SRobert Olsson struct fib_table *tb; 204719baf839SRobert Olsson struct trie *t; 20480ddcf43dSAlexander Duyck size_t sz = sizeof(*tb); 204919baf839SRobert Olsson 20500ddcf43dSAlexander Duyck if (!alias) 20510ddcf43dSAlexander Duyck sz += sizeof(struct trie); 20520ddcf43dSAlexander Duyck 20530ddcf43dSAlexander Duyck tb = kzalloc(sz, GFP_KERNEL); 205451456b29SIan Morris if (!tb) 205519baf839SRobert Olsson return NULL; 205619baf839SRobert Olsson 205719baf839SRobert Olsson tb->tb_id = id; 205821d8c49eSDavid S. Miller tb->tb_num_default = 0; 20590ddcf43dSAlexander Duyck tb->tb_data = (alias ? alias->__data : tb->__data); 20600ddcf43dSAlexander Duyck 20610ddcf43dSAlexander Duyck if (alias) 20620ddcf43dSAlexander Duyck return tb; 206319baf839SRobert Olsson 206419baf839SRobert Olsson t = (struct trie *) tb->tb_data; 206588bae714SAlexander Duyck t->kv[0].pos = KEYLENGTH; 206688bae714SAlexander Duyck t->kv[0].slen = KEYLENGTH; 20678274a97aSAlexander Duyck #ifdef CONFIG_IP_FIB_TRIE_STATS 20688274a97aSAlexander Duyck t->stats = alloc_percpu(struct trie_use_stats); 20698274a97aSAlexander Duyck if (!t->stats) { 20708274a97aSAlexander Duyck kfree(tb); 20718274a97aSAlexander Duyck tb = NULL; 20728274a97aSAlexander Duyck } 20738274a97aSAlexander Duyck #endif 207419baf839SRobert Olsson 207519baf839SRobert Olsson return tb; 207619baf839SRobert Olsson } 207719baf839SRobert Olsson 2078cb7b593cSStephen Hemminger #ifdef CONFIG_PROC_FS 2079cb7b593cSStephen Hemminger /* Depth first Trie walk iterator */ 2080cb7b593cSStephen Hemminger struct fib_trie_iter { 20811c340b2fSDenis V. Lunev struct seq_net_private p; 20823d3b2d25SStephen Hemminger struct fib_table *tb; 208335c6edacSAlexander Duyck struct key_vector *tnode; 2084a034ee3cSEric Dumazet unsigned int index; 2085a034ee3cSEric Dumazet unsigned int depth; 2086cb7b593cSStephen Hemminger }; 208719baf839SRobert Olsson 208835c6edacSAlexander Duyck static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) 208919baf839SRobert Olsson { 209098293e8dSAlexander Duyck unsigned long cindex = iter->index; 209188bae714SAlexander Duyck struct key_vector *pn = iter->tnode; 209288bae714SAlexander Duyck t_key pkey; 20936640e697SEric W. Biederman 2094cb7b593cSStephen Hemminger pr_debug("get_next iter={node=%p index=%d depth=%d}\n", 2095cb7b593cSStephen Hemminger iter->tnode, iter->index, iter->depth); 209619baf839SRobert Olsson 209788bae714SAlexander Duyck while (!IS_TRIE(pn)) { 209888bae714SAlexander Duyck while (cindex < child_length(pn)) { 209988bae714SAlexander Duyck struct key_vector *n = get_child_rcu(pn, cindex++); 210088bae714SAlexander Duyck 210188bae714SAlexander Duyck if (!n) 210288bae714SAlexander Duyck continue; 210388bae714SAlexander Duyck 210419baf839SRobert Olsson if (IS_LEAF(n)) { 210588bae714SAlexander Duyck iter->tnode = pn; 210688bae714SAlexander Duyck iter->index = cindex; 210791b9a277SOlof Johansson } else { 2108cb7b593cSStephen Hemminger /* push down one level */ 2109adaf9816SAlexander Duyck iter->tnode = n; 2110cb7b593cSStephen Hemminger iter->index = 0; 2111cb7b593cSStephen Hemminger ++iter->depth; 211219baf839SRobert Olsson } 211388bae714SAlexander Duyck 2114cb7b593cSStephen Hemminger return n; 211519baf839SRobert Olsson } 211619baf839SRobert Olsson 2117cb7b593cSStephen Hemminger /* Current node exhausted, pop back up */ 211888bae714SAlexander Duyck pkey = pn->key; 211988bae714SAlexander Duyck pn = node_parent_rcu(pn); 212088bae714SAlexander Duyck cindex = get_index(pkey, pn) + 1; 2121cb7b593cSStephen Hemminger --iter->depth; 2122cb7b593cSStephen Hemminger } 2123cb7b593cSStephen Hemminger 212488bae714SAlexander Duyck /* record root node so further searches know we are done */ 212588bae714SAlexander Duyck iter->tnode = pn; 212688bae714SAlexander Duyck iter->index = 0; 212788bae714SAlexander Duyck 2128cb7b593cSStephen Hemminger return NULL; 2129cb7b593cSStephen Hemminger } 2130cb7b593cSStephen Hemminger 213135c6edacSAlexander Duyck static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, 2132cb7b593cSStephen Hemminger struct trie *t) 2133cb7b593cSStephen Hemminger { 2134f38b24c9SFiro Yang struct key_vector *n, *pn; 21355ddf0eb2SRobert Olsson 21365ddf0eb2SRobert Olsson if (!t) 21375ddf0eb2SRobert Olsson return NULL; 21385ddf0eb2SRobert Olsson 2139f38b24c9SFiro Yang pn = t->kv; 214088bae714SAlexander Duyck n = rcu_dereference(pn->tnode[0]); 21413d3b2d25SStephen Hemminger if (!n) 21425ddf0eb2SRobert Olsson return NULL; 2143cb7b593cSStephen Hemminger 21446640e697SEric W. Biederman if (IS_TNODE(n)) { 2145adaf9816SAlexander Duyck iter->tnode = n; 2146cb7b593cSStephen Hemminger iter->index = 0; 21471d25cd6cSRobert Olsson iter->depth = 1; 21486640e697SEric W. Biederman } else { 214988bae714SAlexander Duyck iter->tnode = pn; 21506640e697SEric W. Biederman iter->index = 0; 21516640e697SEric W. Biederman iter->depth = 0; 21526640e697SEric W. Biederman } 21533d3b2d25SStephen Hemminger 2154cb7b593cSStephen Hemminger return n; 2155cb7b593cSStephen Hemminger } 2156cb7b593cSStephen Hemminger 2157cb7b593cSStephen Hemminger static void trie_collect_stats(struct trie *t, struct trie_stat *s) 215819baf839SRobert Olsson { 215935c6edacSAlexander Duyck struct key_vector *n; 2160cb7b593cSStephen Hemminger struct fib_trie_iter iter; 2161cb7b593cSStephen Hemminger 2162cb7b593cSStephen Hemminger memset(s, 0, sizeof(*s)); 216319baf839SRobert Olsson 21642373ce1cSRobert Olsson rcu_read_lock(); 21653d3b2d25SStephen Hemminger for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 2166cb7b593cSStephen Hemminger if (IS_LEAF(n)) { 216779e5ad2cSAlexander Duyck struct fib_alias *fa; 216893672292SStephen Hemminger 216919baf839SRobert Olsson s->leaves++; 2170cb7b593cSStephen Hemminger s->totdepth += iter.depth; 2171cb7b593cSStephen Hemminger if (iter.depth > s->maxdepth) 2172cb7b593cSStephen Hemminger s->maxdepth = iter.depth; 217393672292SStephen Hemminger 217479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) 217593672292SStephen Hemminger ++s->prefixes; 217691b9a277SOlof Johansson } else { 217719baf839SRobert Olsson s->tnodes++; 2178adaf9816SAlexander Duyck if (n->bits < MAX_STAT_DEPTH) 2179adaf9816SAlexander Duyck s->nodesizes[n->bits]++; 21806e22d174SAlexander Duyck s->nullpointers += tn_info(n)->empty_children; 218119baf839SRobert Olsson } 218298293e8dSAlexander Duyck } 21832373ce1cSRobert Olsson rcu_read_unlock(); 218419baf839SRobert Olsson } 218519baf839SRobert Olsson 218619baf839SRobert Olsson /* 218719baf839SRobert Olsson * This outputs /proc/net/fib_triestats 218819baf839SRobert Olsson */ 2189cb7b593cSStephen Hemminger static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 219019baf839SRobert Olsson { 2191a034ee3cSEric Dumazet unsigned int i, max, pointers, bytes, avdepth; 219219baf839SRobert Olsson 219319baf839SRobert Olsson if (stat->leaves) 219419baf839SRobert Olsson avdepth = stat->totdepth*100 / stat->leaves; 219519baf839SRobert Olsson else 219619baf839SRobert Olsson avdepth = 0; 219719baf839SRobert Olsson 2198a07f5f50SStephen Hemminger seq_printf(seq, "\tAver depth: %u.%02d\n", 2199a07f5f50SStephen Hemminger avdepth / 100, avdepth % 100); 2200cb7b593cSStephen Hemminger seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2201cb7b593cSStephen Hemminger 2202cb7b593cSStephen Hemminger seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 220341b489fdSAlexander Duyck bytes = LEAF_SIZE * stat->leaves; 220493672292SStephen Hemminger 220593672292SStephen Hemminger seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 220679e5ad2cSAlexander Duyck bytes += sizeof(struct fib_alias) * stat->prefixes; 220793672292SStephen Hemminger 2208187b5188SStephen Hemminger seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 220941b489fdSAlexander Duyck bytes += TNODE_SIZE(0) * stat->tnodes; 221019baf839SRobert Olsson 221106ef921dSRobert Olsson max = MAX_STAT_DEPTH; 221206ef921dSRobert Olsson while (max > 0 && stat->nodesizes[max-1] == 0) 221319baf839SRobert Olsson max--; 221419baf839SRobert Olsson 2215cb7b593cSStephen Hemminger pointers = 0; 2216f585a991SJerry Snitselaar for (i = 1; i < max; i++) 221719baf839SRobert Olsson if (stat->nodesizes[i] != 0) { 2218187b5188SStephen Hemminger seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 221919baf839SRobert Olsson pointers += (1<<i) * stat->nodesizes[i]; 222019baf839SRobert Olsson } 2221cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2222187b5188SStephen Hemminger seq_printf(seq, "\tPointers: %u\n", pointers); 2223cb7b593cSStephen Hemminger 222435c6edacSAlexander Duyck bytes += sizeof(struct key_vector *) * pointers; 2225187b5188SStephen Hemminger seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2226187b5188SStephen Hemminger seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 222766a2f7fdSStephen Hemminger } 222819baf839SRobert Olsson 222919baf839SRobert Olsson #ifdef CONFIG_IP_FIB_TRIE_STATS 223066a2f7fdSStephen Hemminger static void trie_show_usage(struct seq_file *seq, 22318274a97aSAlexander Duyck const struct trie_use_stats __percpu *stats) 223266a2f7fdSStephen Hemminger { 22338274a97aSAlexander Duyck struct trie_use_stats s = { 0 }; 22348274a97aSAlexander Duyck int cpu; 22358274a97aSAlexander Duyck 22368274a97aSAlexander Duyck /* loop through all of the CPUs and gather up the stats */ 22378274a97aSAlexander Duyck for_each_possible_cpu(cpu) { 22388274a97aSAlexander Duyck const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); 22398274a97aSAlexander Duyck 22408274a97aSAlexander Duyck s.gets += pcpu->gets; 22418274a97aSAlexander Duyck s.backtrack += pcpu->backtrack; 22428274a97aSAlexander Duyck s.semantic_match_passed += pcpu->semantic_match_passed; 22438274a97aSAlexander Duyck s.semantic_match_miss += pcpu->semantic_match_miss; 22448274a97aSAlexander Duyck s.null_node_hit += pcpu->null_node_hit; 22458274a97aSAlexander Duyck s.resize_node_skipped += pcpu->resize_node_skipped; 22468274a97aSAlexander Duyck } 22478274a97aSAlexander Duyck 224866a2f7fdSStephen Hemminger seq_printf(seq, "\nCounters:\n---------\n"); 22498274a97aSAlexander Duyck seq_printf(seq, "gets = %u\n", s.gets); 22508274a97aSAlexander Duyck seq_printf(seq, "backtracks = %u\n", s.backtrack); 2251a07f5f50SStephen Hemminger seq_printf(seq, "semantic match passed = %u\n", 22528274a97aSAlexander Duyck s.semantic_match_passed); 22538274a97aSAlexander Duyck seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); 22548274a97aSAlexander Duyck seq_printf(seq, "null node hit= %u\n", s.null_node_hit); 22558274a97aSAlexander Duyck seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); 225619baf839SRobert Olsson } 225766a2f7fdSStephen Hemminger #endif /* CONFIG_IP_FIB_TRIE_STATS */ 225866a2f7fdSStephen Hemminger 22593d3b2d25SStephen Hemminger static void fib_table_print(struct seq_file *seq, struct fib_table *tb) 2260d717a9a6SStephen Hemminger { 22613d3b2d25SStephen Hemminger if (tb->tb_id == RT_TABLE_LOCAL) 22623d3b2d25SStephen Hemminger seq_puts(seq, "Local:\n"); 22633d3b2d25SStephen Hemminger else if (tb->tb_id == RT_TABLE_MAIN) 22643d3b2d25SStephen Hemminger seq_puts(seq, "Main:\n"); 22653d3b2d25SStephen Hemminger else 22663d3b2d25SStephen Hemminger seq_printf(seq, "Id %d:\n", tb->tb_id); 2267d717a9a6SStephen Hemminger } 226819baf839SRobert Olsson 22693d3b2d25SStephen Hemminger 227019baf839SRobert Olsson static int fib_triestat_seq_show(struct seq_file *seq, void *v) 227119baf839SRobert Olsson { 22721c340b2fSDenis V. Lunev struct net *net = (struct net *)seq->private; 22733d3b2d25SStephen Hemminger unsigned int h; 2274877a9bffSEric W. Biederman 2275d717a9a6SStephen Hemminger seq_printf(seq, 2276a07f5f50SStephen Hemminger "Basic info: size of leaf:" 2277a07f5f50SStephen Hemminger " %Zd bytes, size of tnode: %Zd bytes.\n", 227841b489fdSAlexander Duyck LEAF_SIZE, TNODE_SIZE(0)); 227919baf839SRobert Olsson 22803d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 22813d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 22823d3b2d25SStephen Hemminger struct fib_table *tb; 2283cb7b593cSStephen Hemminger 2284b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 22853d3b2d25SStephen Hemminger struct trie *t = (struct trie *) tb->tb_data; 22863d3b2d25SStephen Hemminger struct trie_stat stat; 22873d3b2d25SStephen Hemminger 22883d3b2d25SStephen Hemminger if (!t) 22893d3b2d25SStephen Hemminger continue; 22903d3b2d25SStephen Hemminger 22913d3b2d25SStephen Hemminger fib_table_print(seq, tb); 22923d3b2d25SStephen Hemminger 22933d3b2d25SStephen Hemminger trie_collect_stats(t, &stat); 22943d3b2d25SStephen Hemminger trie_show_stats(seq, &stat); 22953d3b2d25SStephen Hemminger #ifdef CONFIG_IP_FIB_TRIE_STATS 22968274a97aSAlexander Duyck trie_show_usage(seq, t->stats); 22973d3b2d25SStephen Hemminger #endif 22983d3b2d25SStephen Hemminger } 22993d3b2d25SStephen Hemminger } 2300cb7b593cSStephen Hemminger 230119baf839SRobert Olsson return 0; 230219baf839SRobert Olsson } 230319baf839SRobert Olsson 230419baf839SRobert Olsson static int fib_triestat_seq_open(struct inode *inode, struct file *file) 230519baf839SRobert Olsson { 2306de05c557SPavel Emelyanov return single_open_net(inode, file, fib_triestat_seq_show); 23071c340b2fSDenis V. Lunev } 23081c340b2fSDenis V. Lunev 23099a32144eSArjan van de Ven static const struct file_operations fib_triestat_fops = { 231019baf839SRobert Olsson .owner = THIS_MODULE, 231119baf839SRobert Olsson .open = fib_triestat_seq_open, 231219baf839SRobert Olsson .read = seq_read, 231319baf839SRobert Olsson .llseek = seq_lseek, 2314b6fcbdb4SPavel Emelyanov .release = single_release_net, 231519baf839SRobert Olsson }; 231619baf839SRobert Olsson 231735c6edacSAlexander Duyck static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 231819baf839SRobert Olsson { 23191218854aSYOSHIFUJI Hideaki struct fib_trie_iter *iter = seq->private; 23201218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 2321cb7b593cSStephen Hemminger loff_t idx = 0; 23223d3b2d25SStephen Hemminger unsigned int h; 23233d3b2d25SStephen Hemminger 23243d3b2d25SStephen Hemminger for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 23253d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 23263d3b2d25SStephen Hemminger struct fib_table *tb; 23273d3b2d25SStephen Hemminger 2328b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 232935c6edacSAlexander Duyck struct key_vector *n; 2330cb7b593cSStephen Hemminger 23313d3b2d25SStephen Hemminger for (n = fib_trie_get_first(iter, 23323d3b2d25SStephen Hemminger (struct trie *) tb->tb_data); 23333d3b2d25SStephen Hemminger n; n = fib_trie_get_next(iter)) 23343d3b2d25SStephen Hemminger if (pos == idx++) { 23353d3b2d25SStephen Hemminger iter->tb = tb; 2336cb7b593cSStephen Hemminger return n; 233719baf839SRobert Olsson } 23383d3b2d25SStephen Hemminger } 23393d3b2d25SStephen Hemminger } 234019baf839SRobert Olsson 234119baf839SRobert Olsson return NULL; 234219baf839SRobert Olsson } 234319baf839SRobert Olsson 234419baf839SRobert Olsson static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2345c95aaf9aSStephen Hemminger __acquires(RCU) 234619baf839SRobert Olsson { 2347cb7b593cSStephen Hemminger rcu_read_lock(); 23481218854aSYOSHIFUJI Hideaki return fib_trie_get_idx(seq, *pos); 234919baf839SRobert Olsson } 235019baf839SRobert Olsson 235119baf839SRobert Olsson static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 235219baf839SRobert Olsson { 2353cb7b593cSStephen Hemminger struct fib_trie_iter *iter = seq->private; 23541218854aSYOSHIFUJI Hideaki struct net *net = seq_file_net(seq); 23553d3b2d25SStephen Hemminger struct fib_table *tb = iter->tb; 23563d3b2d25SStephen Hemminger struct hlist_node *tb_node; 23573d3b2d25SStephen Hemminger unsigned int h; 235835c6edacSAlexander Duyck struct key_vector *n; 2359cb7b593cSStephen Hemminger 236019baf839SRobert Olsson ++*pos; 23613d3b2d25SStephen Hemminger /* next node in same table */ 23623d3b2d25SStephen Hemminger n = fib_trie_get_next(iter); 23633d3b2d25SStephen Hemminger if (n) 23643d3b2d25SStephen Hemminger return n; 236591b9a277SOlof Johansson 23663d3b2d25SStephen Hemminger /* walk rest of this hash chain */ 23673d3b2d25SStephen Hemminger h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 23680a5c0475SEric Dumazet while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { 23693d3b2d25SStephen Hemminger tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 23703d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23713d3b2d25SStephen Hemminger if (n) 23723d3b2d25SStephen Hemminger goto found; 23733d3b2d25SStephen Hemminger } 2374cb7b593cSStephen Hemminger 23753d3b2d25SStephen Hemminger /* new hash chain */ 23763d3b2d25SStephen Hemminger while (++h < FIB_TABLE_HASHSZ) { 23773d3b2d25SStephen Hemminger struct hlist_head *head = &net->ipv4.fib_table_hash[h]; 2378b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb_hlist) { 23793d3b2d25SStephen Hemminger n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 23803d3b2d25SStephen Hemminger if (n) 23813d3b2d25SStephen Hemminger goto found; 23823d3b2d25SStephen Hemminger } 23833d3b2d25SStephen Hemminger } 2384cb7b593cSStephen Hemminger return NULL; 23853d3b2d25SStephen Hemminger 23863d3b2d25SStephen Hemminger found: 23873d3b2d25SStephen Hemminger iter->tb = tb; 23883d3b2d25SStephen Hemminger return n; 238919baf839SRobert Olsson } 239019baf839SRobert Olsson 239119baf839SRobert Olsson static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2392c95aaf9aSStephen Hemminger __releases(RCU) 239319baf839SRobert Olsson { 2394cb7b593cSStephen Hemminger rcu_read_unlock(); 239519baf839SRobert Olsson } 239619baf839SRobert Olsson 2397cb7b593cSStephen Hemminger static void seq_indent(struct seq_file *seq, int n) 2398cb7b593cSStephen Hemminger { 2399a034ee3cSEric Dumazet while (n-- > 0) 2400a034ee3cSEric Dumazet seq_puts(seq, " "); 2401cb7b593cSStephen Hemminger } 240219baf839SRobert Olsson 240328d36e37SEric Dumazet static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2404cb7b593cSStephen Hemminger { 2405cb7b593cSStephen Hemminger switch (s) { 2406cb7b593cSStephen Hemminger case RT_SCOPE_UNIVERSE: return "universe"; 2407cb7b593cSStephen Hemminger case RT_SCOPE_SITE: return "site"; 2408cb7b593cSStephen Hemminger case RT_SCOPE_LINK: return "link"; 2409cb7b593cSStephen Hemminger case RT_SCOPE_HOST: return "host"; 2410cb7b593cSStephen Hemminger case RT_SCOPE_NOWHERE: return "nowhere"; 2411cb7b593cSStephen Hemminger default: 241228d36e37SEric Dumazet snprintf(buf, len, "scope=%d", s); 2413cb7b593cSStephen Hemminger return buf; 2414cb7b593cSStephen Hemminger } 2415cb7b593cSStephen Hemminger } 2416cb7b593cSStephen Hemminger 241736cbd3dcSJan Engelhardt static const char *const rtn_type_names[__RTN_MAX] = { 2418cb7b593cSStephen Hemminger [RTN_UNSPEC] = "UNSPEC", 2419cb7b593cSStephen Hemminger [RTN_UNICAST] = "UNICAST", 2420cb7b593cSStephen Hemminger [RTN_LOCAL] = "LOCAL", 2421cb7b593cSStephen Hemminger [RTN_BROADCAST] = "BROADCAST", 2422cb7b593cSStephen Hemminger [RTN_ANYCAST] = "ANYCAST", 2423cb7b593cSStephen Hemminger [RTN_MULTICAST] = "MULTICAST", 2424cb7b593cSStephen Hemminger [RTN_BLACKHOLE] = "BLACKHOLE", 2425cb7b593cSStephen Hemminger [RTN_UNREACHABLE] = "UNREACHABLE", 2426cb7b593cSStephen Hemminger [RTN_PROHIBIT] = "PROHIBIT", 2427cb7b593cSStephen Hemminger [RTN_THROW] = "THROW", 2428cb7b593cSStephen Hemminger [RTN_NAT] = "NAT", 2429cb7b593cSStephen Hemminger [RTN_XRESOLVE] = "XRESOLVE", 2430cb7b593cSStephen Hemminger }; 2431cb7b593cSStephen Hemminger 2432a034ee3cSEric Dumazet static inline const char *rtn_type(char *buf, size_t len, unsigned int t) 2433cb7b593cSStephen Hemminger { 2434cb7b593cSStephen Hemminger if (t < __RTN_MAX && rtn_type_names[t]) 2435cb7b593cSStephen Hemminger return rtn_type_names[t]; 243628d36e37SEric Dumazet snprintf(buf, len, "type %u", t); 2437cb7b593cSStephen Hemminger return buf; 2438cb7b593cSStephen Hemminger } 2439cb7b593cSStephen Hemminger 2440cb7b593cSStephen Hemminger /* Pretty print the trie */ 244119baf839SRobert Olsson static int fib_trie_seq_show(struct seq_file *seq, void *v) 244219baf839SRobert Olsson { 2443cb7b593cSStephen Hemminger const struct fib_trie_iter *iter = seq->private; 244435c6edacSAlexander Duyck struct key_vector *n = v; 244519baf839SRobert Olsson 244688bae714SAlexander Duyck if (IS_TRIE(node_parent_rcu(n))) 24473d3b2d25SStephen Hemminger fib_table_print(seq, iter->tb); 2448095b8501SRobert Olsson 2449095b8501SRobert Olsson if (IS_TNODE(n)) { 2450adaf9816SAlexander Duyck __be32 prf = htonl(n->key); 2451095b8501SRobert Olsson 24521d25cd6cSRobert Olsson seq_indent(seq, iter->depth-1); 2453e9b44019SAlexander Duyck seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", 2454e9b44019SAlexander Duyck &prf, KEYLENGTH - n->pos - n->bits, n->bits, 24556e22d174SAlexander Duyck tn_info(n)->full_children, 24566e22d174SAlexander Duyck tn_info(n)->empty_children); 2457cb7b593cSStephen Hemminger } else { 2458adaf9816SAlexander Duyck __be32 val = htonl(n->key); 245979e5ad2cSAlexander Duyck struct fib_alias *fa; 2460cb7b593cSStephen Hemminger 2461cb7b593cSStephen Hemminger seq_indent(seq, iter->depth); 2462673d57e7SHarvey Harrison seq_printf(seq, " |-- %pI4\n", &val); 246328d36e37SEric Dumazet 246479e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { 246528d36e37SEric Dumazet char buf1[32], buf2[32]; 246628d36e37SEric Dumazet 2467cb7b593cSStephen Hemminger seq_indent(seq, iter->depth + 1); 24685786ec60SAlexander Duyck seq_printf(seq, " /%zu %s %s", 24699b6ebad5SAlexander Duyck KEYLENGTH - fa->fa_slen, 247028d36e37SEric Dumazet rtn_scope(buf1, sizeof(buf1), 247137e826c5SDavid S. Miller fa->fa_info->fib_scope), 247228d36e37SEric Dumazet rtn_type(buf2, sizeof(buf2), 247328d36e37SEric Dumazet fa->fa_type)); 2474cb7b593cSStephen Hemminger if (fa->fa_tos) 2475b9c4d82aSDenis V. Lunev seq_printf(seq, " tos=%d", fa->fa_tos); 2476cb7b593cSStephen Hemminger seq_putc(seq, '\n'); 2477cb7b593cSStephen Hemminger } 2478cb7b593cSStephen Hemminger } 247919baf839SRobert Olsson 248019baf839SRobert Olsson return 0; 248119baf839SRobert Olsson } 248219baf839SRobert Olsson 2483f690808eSStephen Hemminger static const struct seq_operations fib_trie_seq_ops = { 248419baf839SRobert Olsson .start = fib_trie_seq_start, 248519baf839SRobert Olsson .next = fib_trie_seq_next, 248619baf839SRobert Olsson .stop = fib_trie_seq_stop, 248719baf839SRobert Olsson .show = fib_trie_seq_show, 248819baf839SRobert Olsson }; 248919baf839SRobert Olsson 249019baf839SRobert Olsson static int fib_trie_seq_open(struct inode *inode, struct file *file) 249119baf839SRobert Olsson { 24921c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_trie_seq_ops, 2493cf7732e4SPavel Emelyanov sizeof(struct fib_trie_iter)); 249419baf839SRobert Olsson } 249519baf839SRobert Olsson 24969a32144eSArjan van de Ven static const struct file_operations fib_trie_fops = { 249719baf839SRobert Olsson .owner = THIS_MODULE, 249819baf839SRobert Olsson .open = fib_trie_seq_open, 249919baf839SRobert Olsson .read = seq_read, 250019baf839SRobert Olsson .llseek = seq_lseek, 25011c340b2fSDenis V. Lunev .release = seq_release_net, 250219baf839SRobert Olsson }; 250319baf839SRobert Olsson 25048315f5d8SStephen Hemminger struct fib_route_iter { 25058315f5d8SStephen Hemminger struct seq_net_private p; 25068be33e95SAlexander Duyck struct fib_table *main_tb; 250735c6edacSAlexander Duyck struct key_vector *tnode; 25088315f5d8SStephen Hemminger loff_t pos; 25098315f5d8SStephen Hemminger t_key key; 25108315f5d8SStephen Hemminger }; 25118315f5d8SStephen Hemminger 251235c6edacSAlexander Duyck static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, 251335c6edacSAlexander Duyck loff_t pos) 25148315f5d8SStephen Hemminger { 251535c6edacSAlexander Duyck struct key_vector *l, **tp = &iter->tnode; 25168be33e95SAlexander Duyck t_key key; 25178315f5d8SStephen Hemminger 25188be33e95SAlexander Duyck /* use cache location of next-to-find key */ 25198be33e95SAlexander Duyck if (iter->pos > 0 && pos >= iter->pos) { 25208315f5d8SStephen Hemminger pos -= iter->pos; 25218be33e95SAlexander Duyck key = iter->key; 25228be33e95SAlexander Duyck } else { 25238315f5d8SStephen Hemminger iter->pos = 0; 25248be33e95SAlexander Duyck key = 0; 25258315f5d8SStephen Hemminger } 25268315f5d8SStephen Hemminger 25278be33e95SAlexander Duyck while ((l = leaf_walk_rcu(tp, key)) != NULL) { 25288be33e95SAlexander Duyck key = l->key + 1; 25298315f5d8SStephen Hemminger iter->pos++; 25308be33e95SAlexander Duyck 253125b97c01SAndy Whitcroft if (--pos <= 0) 25328be33e95SAlexander Duyck break; 25338be33e95SAlexander Duyck 25348be33e95SAlexander Duyck l = NULL; 25358be33e95SAlexander Duyck 25368be33e95SAlexander Duyck /* handle unlikely case of a key wrap */ 25378be33e95SAlexander Duyck if (!key) 25388be33e95SAlexander Duyck break; 25398315f5d8SStephen Hemminger } 25408315f5d8SStephen Hemminger 25418315f5d8SStephen Hemminger if (l) 25428be33e95SAlexander Duyck iter->key = key; /* remember it */ 25438315f5d8SStephen Hemminger else 25448315f5d8SStephen Hemminger iter->pos = 0; /* forget it */ 25458315f5d8SStephen Hemminger 25468315f5d8SStephen Hemminger return l; 25478315f5d8SStephen Hemminger } 25488315f5d8SStephen Hemminger 25498315f5d8SStephen Hemminger static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) 25508315f5d8SStephen Hemminger __acquires(RCU) 25518315f5d8SStephen Hemminger { 25528315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 25538315f5d8SStephen Hemminger struct fib_table *tb; 25548be33e95SAlexander Duyck struct trie *t; 25558315f5d8SStephen Hemminger 25568315f5d8SStephen Hemminger rcu_read_lock(); 25578be33e95SAlexander Duyck 25581218854aSYOSHIFUJI Hideaki tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); 25598315f5d8SStephen Hemminger if (!tb) 25608315f5d8SStephen Hemminger return NULL; 25618315f5d8SStephen Hemminger 25628be33e95SAlexander Duyck iter->main_tb = tb; 256394d9f1c5SDavid Forster t = (struct trie *)tb->tb_data; 256494d9f1c5SDavid Forster iter->tnode = t->kv; 25658be33e95SAlexander Duyck 25668be33e95SAlexander Duyck if (*pos != 0) 25678be33e95SAlexander Duyck return fib_route_get_idx(iter, *pos); 25688be33e95SAlexander Duyck 25698be33e95SAlexander Duyck iter->pos = 0; 25708be33e95SAlexander Duyck iter->key = 0; 25718be33e95SAlexander Duyck 25728315f5d8SStephen Hemminger return SEQ_START_TOKEN; 25738315f5d8SStephen Hemminger } 25748315f5d8SStephen Hemminger 25758315f5d8SStephen Hemminger static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25768315f5d8SStephen Hemminger { 25778315f5d8SStephen Hemminger struct fib_route_iter *iter = seq->private; 257835c6edacSAlexander Duyck struct key_vector *l = NULL; 25798be33e95SAlexander Duyck t_key key = iter->key; 25808315f5d8SStephen Hemminger 25818315f5d8SStephen Hemminger ++*pos; 25828be33e95SAlexander Duyck 25838be33e95SAlexander Duyck /* only allow key of 0 for start of sequence */ 25848be33e95SAlexander Duyck if ((v == SEQ_START_TOKEN) || key) 25858be33e95SAlexander Duyck l = leaf_walk_rcu(&iter->tnode, key); 25868be33e95SAlexander Duyck 25878be33e95SAlexander Duyck if (l) { 25888be33e95SAlexander Duyck iter->key = l->key + 1; 25898315f5d8SStephen Hemminger iter->pos++; 25908be33e95SAlexander Duyck } else { 25918be33e95SAlexander Duyck iter->pos = 0; 25928315f5d8SStephen Hemminger } 25938315f5d8SStephen Hemminger 25948315f5d8SStephen Hemminger return l; 25958315f5d8SStephen Hemminger } 25968315f5d8SStephen Hemminger 25978315f5d8SStephen Hemminger static void fib_route_seq_stop(struct seq_file *seq, void *v) 25988315f5d8SStephen Hemminger __releases(RCU) 25998315f5d8SStephen Hemminger { 26008315f5d8SStephen Hemminger rcu_read_unlock(); 26018315f5d8SStephen Hemminger } 26028315f5d8SStephen Hemminger 2603a034ee3cSEric Dumazet static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2604cb7b593cSStephen Hemminger { 2605a034ee3cSEric Dumazet unsigned int flags = 0; 2606cb7b593cSStephen Hemminger 2607a034ee3cSEric Dumazet if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) 2608a034ee3cSEric Dumazet flags = RTF_REJECT; 2609cb7b593cSStephen Hemminger if (fi && fi->fib_nh->nh_gw) 2610cb7b593cSStephen Hemminger flags |= RTF_GATEWAY; 261132ab5f80SAl Viro if (mask == htonl(0xFFFFFFFF)) 2612cb7b593cSStephen Hemminger flags |= RTF_HOST; 2613cb7b593cSStephen Hemminger flags |= RTF_UP; 2614cb7b593cSStephen Hemminger return flags; 2615cb7b593cSStephen Hemminger } 2616cb7b593cSStephen Hemminger 2617cb7b593cSStephen Hemminger /* 2618cb7b593cSStephen Hemminger * This outputs /proc/net/route. 2619cb7b593cSStephen Hemminger * The format of the file is not supposed to be changed 2620cb7b593cSStephen Hemminger * and needs to be same as fib_hash output to avoid breaking 2621cb7b593cSStephen Hemminger * legacy utilities 2622cb7b593cSStephen Hemminger */ 2623cb7b593cSStephen Hemminger static int fib_route_seq_show(struct seq_file *seq, void *v) 2624cb7b593cSStephen Hemminger { 2625654eff45SAlexander Duyck struct fib_route_iter *iter = seq->private; 2626654eff45SAlexander Duyck struct fib_table *tb = iter->main_tb; 262779e5ad2cSAlexander Duyck struct fib_alias *fa; 262835c6edacSAlexander Duyck struct key_vector *l = v; 26299b6ebad5SAlexander Duyck __be32 prefix; 2630cb7b593cSStephen Hemminger 2631cb7b593cSStephen Hemminger if (v == SEQ_START_TOKEN) { 2632cb7b593cSStephen Hemminger seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2633cb7b593cSStephen Hemminger "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" 2634cb7b593cSStephen Hemminger "\tWindow\tIRTT"); 2635cb7b593cSStephen Hemminger return 0; 2636cb7b593cSStephen Hemminger } 2637cb7b593cSStephen Hemminger 26389b6ebad5SAlexander Duyck prefix = htonl(l->key); 26399b6ebad5SAlexander Duyck 264079e5ad2cSAlexander Duyck hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 26411371e37dSHerbert Xu const struct fib_info *fi = fa->fa_info; 26429b6ebad5SAlexander Duyck __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); 2643a034ee3cSEric Dumazet unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2644cb7b593cSStephen Hemminger 264579e5ad2cSAlexander Duyck if ((fa->fa_type == RTN_BROADCAST) || 264679e5ad2cSAlexander Duyck (fa->fa_type == RTN_MULTICAST)) 2647cb7b593cSStephen Hemminger continue; 2648cb7b593cSStephen Hemminger 2649654eff45SAlexander Duyck if (fa->tb_id != tb->tb_id) 2650654eff45SAlexander Duyck continue; 2651654eff45SAlexander Duyck 2652652586dfSTetsuo Handa seq_setwidth(seq, 127); 2653652586dfSTetsuo Handa 2654cb7b593cSStephen Hemminger if (fi) 26555e659e4cSPavel Emelyanov seq_printf(seq, 26565e659e4cSPavel Emelyanov "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2657652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2658cb7b593cSStephen Hemminger fi->fib_dev ? fi->fib_dev->name : "*", 2659cb7b593cSStephen Hemminger prefix, 2660cb7b593cSStephen Hemminger fi->fib_nh->nh_gw, flags, 0, 0, 2661cb7b593cSStephen Hemminger fi->fib_priority, 2662cb7b593cSStephen Hemminger mask, 2663a07f5f50SStephen Hemminger (fi->fib_advmss ? 2664a07f5f50SStephen Hemminger fi->fib_advmss + 40 : 0), 2665cb7b593cSStephen Hemminger fi->fib_window, 2666652586dfSTetsuo Handa fi->fib_rtt >> 3); 2667cb7b593cSStephen Hemminger else 26685e659e4cSPavel Emelyanov seq_printf(seq, 26695e659e4cSPavel Emelyanov "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2670652586dfSTetsuo Handa "%d\t%08X\t%d\t%u\t%u", 2671cb7b593cSStephen Hemminger prefix, 0, flags, 0, 0, 0, 2672652586dfSTetsuo Handa mask, 0, 0, 0); 2673cb7b593cSStephen Hemminger 2674652586dfSTetsuo Handa seq_pad(seq, '\n'); 2675cb7b593cSStephen Hemminger } 2676cb7b593cSStephen Hemminger 2677cb7b593cSStephen Hemminger return 0; 2678cb7b593cSStephen Hemminger } 2679cb7b593cSStephen Hemminger 2680f690808eSStephen Hemminger static const struct seq_operations fib_route_seq_ops = { 26818315f5d8SStephen Hemminger .start = fib_route_seq_start, 26828315f5d8SStephen Hemminger .next = fib_route_seq_next, 26838315f5d8SStephen Hemminger .stop = fib_route_seq_stop, 2684cb7b593cSStephen Hemminger .show = fib_route_seq_show, 2685cb7b593cSStephen Hemminger }; 2686cb7b593cSStephen Hemminger 2687cb7b593cSStephen Hemminger static int fib_route_seq_open(struct inode *inode, struct file *file) 2688cb7b593cSStephen Hemminger { 26891c340b2fSDenis V. Lunev return seq_open_net(inode, file, &fib_route_seq_ops, 26908315f5d8SStephen Hemminger sizeof(struct fib_route_iter)); 2691cb7b593cSStephen Hemminger } 2692cb7b593cSStephen Hemminger 26939a32144eSArjan van de Ven static const struct file_operations fib_route_fops = { 2694cb7b593cSStephen Hemminger .owner = THIS_MODULE, 2695cb7b593cSStephen Hemminger .open = fib_route_seq_open, 2696cb7b593cSStephen Hemminger .read = seq_read, 2697cb7b593cSStephen Hemminger .llseek = seq_lseek, 26981c340b2fSDenis V. Lunev .release = seq_release_net, 2699cb7b593cSStephen Hemminger }; 2700cb7b593cSStephen Hemminger 270161a02653SDenis V. Lunev int __net_init fib_proc_init(struct net *net) 270219baf839SRobert Olsson { 2703d4beaa66SGao feng if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) 2704cb7b593cSStephen Hemminger goto out1; 2705cb7b593cSStephen Hemminger 2706d4beaa66SGao feng if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, 270761a02653SDenis V. Lunev &fib_triestat_fops)) 2708cb7b593cSStephen Hemminger goto out2; 2709cb7b593cSStephen Hemminger 2710d4beaa66SGao feng if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) 2711cb7b593cSStephen Hemminger goto out3; 2712cb7b593cSStephen Hemminger 271319baf839SRobert Olsson return 0; 2714cb7b593cSStephen Hemminger 2715cb7b593cSStephen Hemminger out3: 2716ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2717cb7b593cSStephen Hemminger out2: 2718ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2719cb7b593cSStephen Hemminger out1: 2720cb7b593cSStephen Hemminger return -ENOMEM; 272119baf839SRobert Olsson } 272219baf839SRobert Olsson 272361a02653SDenis V. Lunev void __net_exit fib_proc_exit(struct net *net) 272419baf839SRobert Olsson { 2725ece31ffdSGao feng remove_proc_entry("fib_trie", net->proc_net); 2726ece31ffdSGao feng remove_proc_entry("fib_triestat", net->proc_net); 2727ece31ffdSGao feng remove_proc_entry("route", net->proc_net); 272819baf839SRobert Olsson } 272919baf839SRobert Olsson 273019baf839SRobert Olsson #endif /* CONFIG_PROC_FS */ 2731