11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Linux INET6 implementation 31da177e4SLinus Torvalds * Forwarding Information Database 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Authors: 61da177e4SLinus Torvalds * Pedro Roque <roque@di.fc.ul.pt> 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 91da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 101da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 111da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 128db46f1dSWang Yufen * 131da177e4SLinus Torvalds * Changes: 141da177e4SLinus Torvalds * Yuji SEKIYA @USAGI: Support default route on router node; 151da177e4SLinus Torvalds * remove ip6_null_entry from the top of 161da177e4SLinus Torvalds * routing table. 17c0bece9fSYOSHIFUJI Hideaki * Ville Nuorvala: Fixed routing subtrees. 181da177e4SLinus Torvalds */ 19f3213831SJoe Perches 20f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt 21f3213831SJoe Perches 221da177e4SLinus Torvalds #include <linux/errno.h> 231da177e4SLinus Torvalds #include <linux/types.h> 241da177e4SLinus Torvalds #include <linux/net.h> 251da177e4SLinus Torvalds #include <linux/route.h> 261da177e4SLinus Torvalds #include <linux/netdevice.h> 271da177e4SLinus Torvalds #include <linux/in6.h> 281da177e4SLinus Torvalds #include <linux/init.h> 29c71099acSThomas Graf #include <linux/list.h> 305a0e3ad6STejun Heo #include <linux/slab.h> 311da177e4SLinus Torvalds 321da177e4SLinus Torvalds #include <net/ipv6.h> 331da177e4SLinus Torvalds #include <net/ndisc.h> 341da177e4SLinus Torvalds #include <net/addrconf.h> 3519e42e45SRoopa Prabhu #include <net/lwtunnel.h> 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include <net/ip6_fib.h> 381da177e4SLinus Torvalds #include <net/ip6_route.h> 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds #define RT6_DEBUG 2 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #if RT6_DEBUG >= 3 4391df42beSJoe Perches #define RT6_TRACE(x...) pr_debug(x) 441da177e4SLinus Torvalds #else 451da177e4SLinus Torvalds #define RT6_TRACE(x...) do { ; } while (0) 461da177e4SLinus Torvalds #endif 471da177e4SLinus Torvalds 48e18b890bSChristoph Lameter static struct kmem_cache *fib6_node_kmem __read_mostly; 491da177e4SLinus Torvalds 5094b2cfe0SHannes Frederic Sowa struct fib6_cleaner { 5194b2cfe0SHannes Frederic Sowa struct fib6_walker w; 52ec7d43c2SBenjamin Thery struct net *net; 531da177e4SLinus Torvalds int (*func)(struct rt6_info *, void *arg); 54327571cbSHannes Frederic Sowa int sernum; 551da177e4SLinus Torvalds void *arg; 561da177e4SLinus Torvalds }; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 591da177e4SLinus Torvalds #define FWS_INIT FWS_S 601da177e4SLinus Torvalds #else 611da177e4SLinus Torvalds #define FWS_INIT FWS_L 621da177e4SLinus Torvalds #endif 631da177e4SLinus Torvalds 64163cd4e8SDuan Jiong static void fib6_prune_clones(struct net *net, struct fib6_node *fn); 658ed67789SDaniel Lezcano static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 668ed67789SDaniel Lezcano static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 679a03cd8fSMichal Kubeček static int fib6_walk(struct net *net, struct fib6_walker *w); 6894b2cfe0SHannes Frederic Sowa static int fib6_walk_continue(struct fib6_walker *w); 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds /* 711da177e4SLinus Torvalds * A routing update causes an increase of the serial number on the 721da177e4SLinus Torvalds * affected subtree. This allows for cached routes to be asynchronously 731da177e4SLinus Torvalds * tested when modifications are made to the destination cache as a 741da177e4SLinus Torvalds * result of redirects, path MTU changes, etc. 751da177e4SLinus Torvalds */ 761da177e4SLinus Torvalds 775b7c931dSDaniel Lezcano static void fib6_gc_timer_cb(unsigned long arg); 785b7c931dSDaniel Lezcano 799a03cd8fSMichal Kubeček #define FOR_WALKERS(net, w) \ 809a03cd8fSMichal Kubeček list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) 811da177e4SLinus Torvalds 829a03cd8fSMichal Kubeček static void fib6_walker_link(struct net *net, struct fib6_walker *w) 8390d41122SAdrian Bunk { 849a03cd8fSMichal Kubeček write_lock_bh(&net->ipv6.fib6_walker_lock); 859a03cd8fSMichal Kubeček list_add(&w->lh, &net->ipv6.fib6_walkers); 869a03cd8fSMichal Kubeček write_unlock_bh(&net->ipv6.fib6_walker_lock); 8790d41122SAdrian Bunk } 8890d41122SAdrian Bunk 899a03cd8fSMichal Kubeček static void fib6_walker_unlink(struct net *net, struct fib6_walker *w) 9090d41122SAdrian Bunk { 919a03cd8fSMichal Kubeček write_lock_bh(&net->ipv6.fib6_walker_lock); 92bbef49daSAlexey Dobriyan list_del(&w->lh); 939a03cd8fSMichal Kubeček write_unlock_bh(&net->ipv6.fib6_walker_lock); 9490d41122SAdrian Bunk } 9594b2cfe0SHannes Frederic Sowa 96812918c4SHannes Frederic Sowa static int fib6_new_sernum(struct net *net) 971da177e4SLinus Torvalds { 9842b18706SHannes Frederic Sowa int new, old; 9942b18706SHannes Frederic Sowa 10042b18706SHannes Frederic Sowa do { 101812918c4SHannes Frederic Sowa old = atomic_read(&net->ipv6.fib6_sernum); 10242b18706SHannes Frederic Sowa new = old < INT_MAX ? old + 1 : 1; 103812918c4SHannes Frederic Sowa } while (atomic_cmpxchg(&net->ipv6.fib6_sernum, 104812918c4SHannes Frederic Sowa old, new) != old); 10542b18706SHannes Frederic Sowa return new; 1061da177e4SLinus Torvalds } 1071da177e4SLinus Torvalds 108327571cbSHannes Frederic Sowa enum { 109327571cbSHannes Frederic Sowa FIB6_NO_SERNUM_CHANGE = 0, 110327571cbSHannes Frederic Sowa }; 111327571cbSHannes Frederic Sowa 1121da177e4SLinus Torvalds /* 1131da177e4SLinus Torvalds * Auxiliary address test functions for the radix tree. 1141da177e4SLinus Torvalds * 1151da177e4SLinus Torvalds * These assume a 32bit processor (although it will work on 1161da177e4SLinus Torvalds * 64bit processors) 1171da177e4SLinus Torvalds */ 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds /* 1201da177e4SLinus Torvalds * test bit 1211da177e4SLinus Torvalds */ 12202cdce53SYOSHIFUJI Hideaki / 吉藤英明 #if defined(__LITTLE_ENDIAN) 12302cdce53SYOSHIFUJI Hideaki / 吉藤英明 # define BITOP_BE32_SWIZZLE (0x1F & ~7) 12402cdce53SYOSHIFUJI Hideaki / 吉藤英明 #else 12502cdce53SYOSHIFUJI Hideaki / 吉藤英明 # define BITOP_BE32_SWIZZLE 0 12602cdce53SYOSHIFUJI Hideaki / 吉藤英明 #endif 1271da177e4SLinus Torvalds 12894b2cfe0SHannes Frederic Sowa static __be32 addr_bit_set(const void *token, int fn_bit) 1291da177e4SLinus Torvalds { 130b71d1d42SEric Dumazet const __be32 *addr = token; 13102cdce53SYOSHIFUJI Hideaki / 吉藤英明 /* 13202cdce53SYOSHIFUJI Hideaki / 吉藤英明 * Here, 13302cdce53SYOSHIFUJI Hideaki / 吉藤英明 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) 13402cdce53SYOSHIFUJI Hideaki / 吉藤英明 * is optimized version of 13502cdce53SYOSHIFUJI Hideaki / 吉藤英明 * htonl(1 << ((~fn_bit)&0x1F)) 13602cdce53SYOSHIFUJI Hideaki / 吉藤英明 * See include/asm-generic/bitops/le.h. 13702cdce53SYOSHIFUJI Hideaki / 吉藤英明 */ 1380eae88f3SEric Dumazet return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & 1390eae88f3SEric Dumazet addr[fn_bit >> 5]; 1401da177e4SLinus Torvalds } 1411da177e4SLinus Torvalds 14294b2cfe0SHannes Frederic Sowa static struct fib6_node *node_alloc(void) 1431da177e4SLinus Torvalds { 1441da177e4SLinus Torvalds struct fib6_node *fn; 1451da177e4SLinus Torvalds 146c3762229SRobert P. J. Day fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds return fn; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 15194b2cfe0SHannes Frederic Sowa static void node_free(struct fib6_node *fn) 1521da177e4SLinus Torvalds { 1531da177e4SLinus Torvalds kmem_cache_free(fib6_node_kmem, fn); 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds 15670da5b5cSMartin KaFai Lau static void rt6_rcu_free(struct rt6_info *rt) 15770da5b5cSMartin KaFai Lau { 15870da5b5cSMartin KaFai Lau call_rcu(&rt->dst.rcu_head, dst_rcu_free); 15970da5b5cSMartin KaFai Lau } 16070da5b5cSMartin KaFai Lau 161d52d3997SMartin KaFai Lau static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) 162d52d3997SMartin KaFai Lau { 163d52d3997SMartin KaFai Lau int cpu; 164d52d3997SMartin KaFai Lau 165d52d3997SMartin KaFai Lau if (!non_pcpu_rt->rt6i_pcpu) 166d52d3997SMartin KaFai Lau return; 167d52d3997SMartin KaFai Lau 168d52d3997SMartin KaFai Lau for_each_possible_cpu(cpu) { 169d52d3997SMartin KaFai Lau struct rt6_info **ppcpu_rt; 170d52d3997SMartin KaFai Lau struct rt6_info *pcpu_rt; 171d52d3997SMartin KaFai Lau 172d52d3997SMartin KaFai Lau ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); 173d52d3997SMartin KaFai Lau pcpu_rt = *ppcpu_rt; 174d52d3997SMartin KaFai Lau if (pcpu_rt) { 17570da5b5cSMartin KaFai Lau rt6_rcu_free(pcpu_rt); 176d52d3997SMartin KaFai Lau *ppcpu_rt = NULL; 177d52d3997SMartin KaFai Lau } 178d52d3997SMartin KaFai Lau } 1799c7370a1SMartin KaFai Lau 180903ce4abSMartin KaFai Lau free_percpu(non_pcpu_rt->rt6i_pcpu); 1819c7370a1SMartin KaFai Lau non_pcpu_rt->rt6i_pcpu = NULL; 182d52d3997SMartin KaFai Lau } 183d52d3997SMartin KaFai Lau 18494b2cfe0SHannes Frederic Sowa static void rt6_release(struct rt6_info *rt) 1851da177e4SLinus Torvalds { 186d52d3997SMartin KaFai Lau if (atomic_dec_and_test(&rt->rt6i_ref)) { 187d52d3997SMartin KaFai Lau rt6_free_pcpu(rt); 18870da5b5cSMartin KaFai Lau rt6_rcu_free(rt); 1891da177e4SLinus Torvalds } 190d52d3997SMartin KaFai Lau } 1911da177e4SLinus Torvalds 19258f09b78SDaniel Lezcano static void fib6_link_table(struct net *net, struct fib6_table *tb) 1931b43af54SPatrick McHardy { 1941b43af54SPatrick McHardy unsigned int h; 1951b43af54SPatrick McHardy 196375216adSThomas Graf /* 197375216adSThomas Graf * Initialize table lock at a single place to give lockdep a key, 198375216adSThomas Graf * tables aren't visible prior to being linked to the list. 199375216adSThomas Graf */ 200375216adSThomas Graf rwlock_init(&tb->tb6_lock); 201375216adSThomas Graf 202a33bc5c1SNeil Horman h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); 2031b43af54SPatrick McHardy 2041b43af54SPatrick McHardy /* 2051b43af54SPatrick McHardy * No protection necessary, this is the only list mutatation 2061b43af54SPatrick McHardy * operation, tables never disappear once they exist. 2071b43af54SPatrick McHardy */ 20858f09b78SDaniel Lezcano hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); 2091b43af54SPatrick McHardy } 2101b43af54SPatrick McHardy 2111b43af54SPatrick McHardy #ifdef CONFIG_IPV6_MULTIPLE_TABLES 212e0b85590SDaniel Lezcano 2138ed67789SDaniel Lezcano static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) 214c71099acSThomas Graf { 215c71099acSThomas Graf struct fib6_table *table; 216c71099acSThomas Graf 217c71099acSThomas Graf table = kzalloc(sizeof(*table), GFP_ATOMIC); 218507c9b1eSDavid S. Miller if (table) { 219c71099acSThomas Graf table->tb6_id = id; 2208ed67789SDaniel Lezcano table->tb6_root.leaf = net->ipv6.ip6_null_entry; 221c71099acSThomas Graf table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2228e773277SDavid S. Miller inet_peer_base_init(&table->tb6_peers); 223c71099acSThomas Graf } 224c71099acSThomas Graf 225c71099acSThomas Graf return table; 226c71099acSThomas Graf } 227c71099acSThomas Graf 22858f09b78SDaniel Lezcano struct fib6_table *fib6_new_table(struct net *net, u32 id) 229c71099acSThomas Graf { 230c71099acSThomas Graf struct fib6_table *tb; 231c71099acSThomas Graf 232c71099acSThomas Graf if (id == 0) 233c71099acSThomas Graf id = RT6_TABLE_MAIN; 23458f09b78SDaniel Lezcano tb = fib6_get_table(net, id); 235c71099acSThomas Graf if (tb) 236c71099acSThomas Graf return tb; 237c71099acSThomas Graf 2388ed67789SDaniel Lezcano tb = fib6_alloc_table(net, id); 239507c9b1eSDavid S. Miller if (tb) 24058f09b78SDaniel Lezcano fib6_link_table(net, tb); 241c71099acSThomas Graf 242c71099acSThomas Graf return tb; 243c71099acSThomas Graf } 244b3b4663cSDavid Ahern EXPORT_SYMBOL_GPL(fib6_new_table); 245c71099acSThomas Graf 24658f09b78SDaniel Lezcano struct fib6_table *fib6_get_table(struct net *net, u32 id) 247c71099acSThomas Graf { 248c71099acSThomas Graf struct fib6_table *tb; 24958f09b78SDaniel Lezcano struct hlist_head *head; 250c71099acSThomas Graf unsigned int h; 251c71099acSThomas Graf 252c71099acSThomas Graf if (id == 0) 253c71099acSThomas Graf id = RT6_TABLE_MAIN; 254a33bc5c1SNeil Horman h = id & (FIB6_TABLE_HASHSZ - 1); 255c71099acSThomas Graf rcu_read_lock(); 25658f09b78SDaniel Lezcano head = &net->ipv6.fib_table_hash[h]; 257b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 258c71099acSThomas Graf if (tb->tb6_id == id) { 259c71099acSThomas Graf rcu_read_unlock(); 260c71099acSThomas Graf return tb; 261c71099acSThomas Graf } 262c71099acSThomas Graf } 263c71099acSThomas Graf rcu_read_unlock(); 264c71099acSThomas Graf 265c71099acSThomas Graf return NULL; 266c71099acSThomas Graf } 267c4850687SDavid Ahern EXPORT_SYMBOL_GPL(fib6_get_table); 268c71099acSThomas Graf 2692c8c1e72SAlexey Dobriyan static void __net_init fib6_tables_init(struct net *net) 270c71099acSThomas Graf { 27158f09b78SDaniel Lezcano fib6_link_table(net, net->ipv6.fib6_main_tbl); 27258f09b78SDaniel Lezcano fib6_link_table(net, net->ipv6.fib6_local_tbl); 273c71099acSThomas Graf } 274c71099acSThomas Graf #else 275c71099acSThomas Graf 27658f09b78SDaniel Lezcano struct fib6_table *fib6_new_table(struct net *net, u32 id) 277c71099acSThomas Graf { 27858f09b78SDaniel Lezcano return fib6_get_table(net, id); 279c71099acSThomas Graf } 280c71099acSThomas Graf 28158f09b78SDaniel Lezcano struct fib6_table *fib6_get_table(struct net *net, u32 id) 282c71099acSThomas Graf { 28358f09b78SDaniel Lezcano return net->ipv6.fib6_main_tbl; 284c71099acSThomas Graf } 285c71099acSThomas Graf 2864c9483b2SDavid S. Miller struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 28758f09b78SDaniel Lezcano int flags, pol_lookup_t lookup) 288c71099acSThomas Graf { 289ab997ad4Slucien struct rt6_info *rt; 290ab997ad4Slucien 291ab997ad4Slucien rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); 292ab997ad4Slucien if (rt->rt6i_flags & RTF_REJECT && 293ab997ad4Slucien rt->dst.error == -EAGAIN) { 294ab997ad4Slucien ip6_rt_put(rt); 295ab997ad4Slucien rt = net->ipv6.ip6_null_entry; 296ab997ad4Slucien dst_hold(&rt->dst); 297ab997ad4Slucien } 298ab997ad4Slucien 299ab997ad4Slucien return &rt->dst; 300c71099acSThomas Graf } 301c71099acSThomas Graf 3022c8c1e72SAlexey Dobriyan static void __net_init fib6_tables_init(struct net *net) 303c71099acSThomas Graf { 30458f09b78SDaniel Lezcano fib6_link_table(net, net->ipv6.fib6_main_tbl); 305c71099acSThomas Graf } 306c71099acSThomas Graf 307c71099acSThomas Graf #endif 308c71099acSThomas Graf 30994b2cfe0SHannes Frederic Sowa static int fib6_dump_node(struct fib6_walker *w) 3101b43af54SPatrick McHardy { 3111b43af54SPatrick McHardy int res; 3121b43af54SPatrick McHardy struct rt6_info *rt; 3131b43af54SPatrick McHardy 314d8d1f30bSChangli Gao for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 3151b43af54SPatrick McHardy res = rt6_dump_route(rt, w->args); 3161b43af54SPatrick McHardy if (res < 0) { 3171b43af54SPatrick McHardy /* Frame is full, suspend walking */ 3181b43af54SPatrick McHardy w->leaf = rt; 3191b43af54SPatrick McHardy return 1; 3201b43af54SPatrick McHardy } 3211b43af54SPatrick McHardy } 3221b43af54SPatrick McHardy w->leaf = NULL; 3231b43af54SPatrick McHardy return 0; 3241b43af54SPatrick McHardy } 3251b43af54SPatrick McHardy 3261b43af54SPatrick McHardy static void fib6_dump_end(struct netlink_callback *cb) 3271b43af54SPatrick McHardy { 3289a03cd8fSMichal Kubeček struct net *net = sock_net(cb->skb->sk); 32994b2cfe0SHannes Frederic Sowa struct fib6_walker *w = (void *)cb->args[2]; 3301b43af54SPatrick McHardy 3311b43af54SPatrick McHardy if (w) { 3327891cc81SHerbert Xu if (cb->args[4]) { 3337891cc81SHerbert Xu cb->args[4] = 0; 3349a03cd8fSMichal Kubeček fib6_walker_unlink(net, w); 3357891cc81SHerbert Xu } 3361b43af54SPatrick McHardy cb->args[2] = 0; 3371b43af54SPatrick McHardy kfree(w); 3381b43af54SPatrick McHardy } 3391b43af54SPatrick McHardy cb->done = (void *)cb->args[3]; 3401b43af54SPatrick McHardy cb->args[1] = 3; 3411b43af54SPatrick McHardy } 3421b43af54SPatrick McHardy 3431b43af54SPatrick McHardy static int fib6_dump_done(struct netlink_callback *cb) 3441b43af54SPatrick McHardy { 3451b43af54SPatrick McHardy fib6_dump_end(cb); 3461b43af54SPatrick McHardy return cb->done ? cb->done(cb) : 0; 3471b43af54SPatrick McHardy } 3481b43af54SPatrick McHardy 3491b43af54SPatrick McHardy static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, 3501b43af54SPatrick McHardy struct netlink_callback *cb) 3511b43af54SPatrick McHardy { 3529a03cd8fSMichal Kubeček struct net *net = sock_net(skb->sk); 35394b2cfe0SHannes Frederic Sowa struct fib6_walker *w; 3541b43af54SPatrick McHardy int res; 3551b43af54SPatrick McHardy 3561b43af54SPatrick McHardy w = (void *)cb->args[2]; 3571b43af54SPatrick McHardy w->root = &table->tb6_root; 3581b43af54SPatrick McHardy 3591b43af54SPatrick McHardy if (cb->args[4] == 0) { 3602bec5a36SPatrick McHardy w->count = 0; 3612bec5a36SPatrick McHardy w->skip = 0; 3622bec5a36SPatrick McHardy 3631b43af54SPatrick McHardy read_lock_bh(&table->tb6_lock); 3649a03cd8fSMichal Kubeček res = fib6_walk(net, w); 3651b43af54SPatrick McHardy read_unlock_bh(&table->tb6_lock); 3662bec5a36SPatrick McHardy if (res > 0) { 3671b43af54SPatrick McHardy cb->args[4] = 1; 3682bec5a36SPatrick McHardy cb->args[5] = w->root->fn_sernum; 3692bec5a36SPatrick McHardy } 3701b43af54SPatrick McHardy } else { 3712bec5a36SPatrick McHardy if (cb->args[5] != w->root->fn_sernum) { 3722bec5a36SPatrick McHardy /* Begin at the root if the tree changed */ 3732bec5a36SPatrick McHardy cb->args[5] = w->root->fn_sernum; 3742bec5a36SPatrick McHardy w->state = FWS_INIT; 3752bec5a36SPatrick McHardy w->node = w->root; 3762bec5a36SPatrick McHardy w->skip = w->count; 3772bec5a36SPatrick McHardy } else 3782bec5a36SPatrick McHardy w->skip = 0; 3792bec5a36SPatrick McHardy 3801b43af54SPatrick McHardy read_lock_bh(&table->tb6_lock); 3811b43af54SPatrick McHardy res = fib6_walk_continue(w); 3821b43af54SPatrick McHardy read_unlock_bh(&table->tb6_lock); 3837891cc81SHerbert Xu if (res <= 0) { 3849a03cd8fSMichal Kubeček fib6_walker_unlink(net, w); 3851b43af54SPatrick McHardy cb->args[4] = 0; 3861b43af54SPatrick McHardy } 3877891cc81SHerbert Xu } 3887891cc81SHerbert Xu 3891b43af54SPatrick McHardy return res; 3901b43af54SPatrick McHardy } 3911b43af54SPatrick McHardy 392c127ea2cSThomas Graf static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 3931b43af54SPatrick McHardy { 3943b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(skb->sk); 3951b43af54SPatrick McHardy unsigned int h, s_h; 3961b43af54SPatrick McHardy unsigned int e = 0, s_e; 3971b43af54SPatrick McHardy struct rt6_rtnl_dump_arg arg; 39894b2cfe0SHannes Frederic Sowa struct fib6_walker *w; 3991b43af54SPatrick McHardy struct fib6_table *tb; 40058f09b78SDaniel Lezcano struct hlist_head *head; 4011b43af54SPatrick McHardy int res = 0; 4021b43af54SPatrick McHardy 4031b43af54SPatrick McHardy s_h = cb->args[0]; 4041b43af54SPatrick McHardy s_e = cb->args[1]; 4051b43af54SPatrick McHardy 4061b43af54SPatrick McHardy w = (void *)cb->args[2]; 407507c9b1eSDavid S. Miller if (!w) { 4081b43af54SPatrick McHardy /* New dump: 4091b43af54SPatrick McHardy * 4101b43af54SPatrick McHardy * 1. hook callback destructor. 4111b43af54SPatrick McHardy */ 4121b43af54SPatrick McHardy cb->args[3] = (long)cb->done; 4131b43af54SPatrick McHardy cb->done = fib6_dump_done; 4141b43af54SPatrick McHardy 4151b43af54SPatrick McHardy /* 4161b43af54SPatrick McHardy * 2. allocate and initialize walker. 4171b43af54SPatrick McHardy */ 4181b43af54SPatrick McHardy w = kzalloc(sizeof(*w), GFP_ATOMIC); 419507c9b1eSDavid S. Miller if (!w) 4201b43af54SPatrick McHardy return -ENOMEM; 4211b43af54SPatrick McHardy w->func = fib6_dump_node; 4221b43af54SPatrick McHardy cb->args[2] = (long)w; 4231b43af54SPatrick McHardy } 4241b43af54SPatrick McHardy 4251b43af54SPatrick McHardy arg.skb = skb; 4261b43af54SPatrick McHardy arg.cb = cb; 427191cd582SBrian Haley arg.net = net; 4281b43af54SPatrick McHardy w->args = &arg; 4291b43af54SPatrick McHardy 430e67f88ddSEric Dumazet rcu_read_lock(); 431a33bc5c1SNeil Horman for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 4321b43af54SPatrick McHardy e = 0; 43358f09b78SDaniel Lezcano head = &net->ipv6.fib_table_hash[h]; 434b67bfe0dSSasha Levin hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 4351b43af54SPatrick McHardy if (e < s_e) 4361b43af54SPatrick McHardy goto next; 4371b43af54SPatrick McHardy res = fib6_dump_table(tb, skb, cb); 4381b43af54SPatrick McHardy if (res != 0) 4391b43af54SPatrick McHardy goto out; 4401b43af54SPatrick McHardy next: 4411b43af54SPatrick McHardy e++; 4421b43af54SPatrick McHardy } 4431b43af54SPatrick McHardy } 4441b43af54SPatrick McHardy out: 445e67f88ddSEric Dumazet rcu_read_unlock(); 4461b43af54SPatrick McHardy cb->args[1] = e; 4471b43af54SPatrick McHardy cb->args[0] = h; 4481b43af54SPatrick McHardy 4491b43af54SPatrick McHardy res = res < 0 ? res : skb->len; 4501b43af54SPatrick McHardy if (res <= 0) 4511b43af54SPatrick McHardy fib6_dump_end(cb); 4521b43af54SPatrick McHardy return res; 4531b43af54SPatrick McHardy } 4541da177e4SLinus Torvalds 4551da177e4SLinus Torvalds /* 4561da177e4SLinus Torvalds * Routing Table 4571da177e4SLinus Torvalds * 4581da177e4SLinus Torvalds * return the appropriate node for a routing tree "add" operation 4591da177e4SLinus Torvalds * by either creating and inserting or by returning an existing 4601da177e4SLinus Torvalds * node. 4611da177e4SLinus Torvalds */ 4621da177e4SLinus Torvalds 4639225b230Sfan.du static struct fib6_node *fib6_add_1(struct fib6_node *root, 4649225b230Sfan.du struct in6_addr *addr, int plen, 4654a287ebaSMatti Vaittinen int offset, int allow_create, 466c8c4d42aSHannes Frederic Sowa int replace_required, int sernum) 4671da177e4SLinus Torvalds { 4681da177e4SLinus Torvalds struct fib6_node *fn, *in, *ln; 4691da177e4SLinus Torvalds struct fib6_node *pn = NULL; 4701da177e4SLinus Torvalds struct rt6key *key; 4711da177e4SLinus Torvalds int bit; 472e69a4adcSAl Viro __be32 dir = 0; 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds RT6_TRACE("fib6_add_1\n"); 4751da177e4SLinus Torvalds 4761da177e4SLinus Torvalds /* insert node in tree */ 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds fn = root; 4791da177e4SLinus Torvalds 4801da177e4SLinus Torvalds do { 4811da177e4SLinus Torvalds key = (struct rt6key *)((u8 *)fn->leaf + offset); 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds /* 4841da177e4SLinus Torvalds * Prefix match 4851da177e4SLinus Torvalds */ 4861da177e4SLinus Torvalds if (plen < fn->fn_bit || 4874a287ebaSMatti Vaittinen !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { 48814df015bSMatti Vaittinen if (!allow_create) { 48914df015bSMatti Vaittinen if (replace_required) { 490f3213831SJoe Perches pr_warn("Can't replace route, no match found\n"); 49114df015bSMatti Vaittinen return ERR_PTR(-ENOENT); 49214df015bSMatti Vaittinen } 493f3213831SJoe Perches pr_warn("NLM_F_CREATE should be set when creating new route\n"); 49414df015bSMatti Vaittinen } 4951da177e4SLinus Torvalds goto insert_above; 4964a287ebaSMatti Vaittinen } 4971da177e4SLinus Torvalds 4981da177e4SLinus Torvalds /* 4991da177e4SLinus Torvalds * Exact match ? 5001da177e4SLinus Torvalds */ 5011da177e4SLinus Torvalds 5021da177e4SLinus Torvalds if (plen == fn->fn_bit) { 5031da177e4SLinus Torvalds /* clean up an intermediate node */ 504507c9b1eSDavid S. Miller if (!(fn->fn_flags & RTN_RTINFO)) { 5051da177e4SLinus Torvalds rt6_release(fn->leaf); 5061da177e4SLinus Torvalds fn->leaf = NULL; 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds 5091da177e4SLinus Torvalds fn->fn_sernum = sernum; 5101da177e4SLinus Torvalds 5111da177e4SLinus Torvalds return fn; 5121da177e4SLinus Torvalds } 5131da177e4SLinus Torvalds 5141da177e4SLinus Torvalds /* 5151da177e4SLinus Torvalds * We have more bits to go 5161da177e4SLinus Torvalds */ 5171da177e4SLinus Torvalds 5181da177e4SLinus Torvalds /* Try to walk down on tree. */ 5191da177e4SLinus Torvalds fn->fn_sernum = sernum; 5201da177e4SLinus Torvalds dir = addr_bit_set(addr, fn->fn_bit); 5211da177e4SLinus Torvalds pn = fn; 5221da177e4SLinus Torvalds fn = dir ? fn->right : fn->left; 5231da177e4SLinus Torvalds } while (fn); 5241da177e4SLinus Torvalds 52514df015bSMatti Vaittinen if (!allow_create) { 5264a287ebaSMatti Vaittinen /* We should not create new node because 5274a287ebaSMatti Vaittinen * NLM_F_REPLACE was specified without NLM_F_CREATE 5284a287ebaSMatti Vaittinen * I assume it is safe to require NLM_F_CREATE when 5294a287ebaSMatti Vaittinen * REPLACE flag is used! Later we may want to remove the 5304a287ebaSMatti Vaittinen * check for replace_required, because according 5314a287ebaSMatti Vaittinen * to netlink specification, NLM_F_CREATE 5324a287ebaSMatti Vaittinen * MUST be specified if new route is created. 5334a287ebaSMatti Vaittinen * That would keep IPv6 consistent with IPv4 5344a287ebaSMatti Vaittinen */ 53514df015bSMatti Vaittinen if (replace_required) { 536f3213831SJoe Perches pr_warn("Can't replace route, no match found\n"); 5374a287ebaSMatti Vaittinen return ERR_PTR(-ENOENT); 5384a287ebaSMatti Vaittinen } 539f3213831SJoe Perches pr_warn("NLM_F_CREATE should be set when creating new route\n"); 54014df015bSMatti Vaittinen } 5411da177e4SLinus Torvalds /* 5421da177e4SLinus Torvalds * We walked to the bottom of tree. 5431da177e4SLinus Torvalds * Create new leaf node without children. 5441da177e4SLinus Torvalds */ 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds ln = node_alloc(); 5471da177e4SLinus Torvalds 548507c9b1eSDavid S. Miller if (!ln) 549188c517aSLin Ming return ERR_PTR(-ENOMEM); 5501da177e4SLinus Torvalds ln->fn_bit = plen; 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds ln->parent = pn; 5531da177e4SLinus Torvalds ln->fn_sernum = sernum; 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds if (dir) 5561da177e4SLinus Torvalds pn->right = ln; 5571da177e4SLinus Torvalds else 5581da177e4SLinus Torvalds pn->left = ln; 5591da177e4SLinus Torvalds 5601da177e4SLinus Torvalds return ln; 5611da177e4SLinus Torvalds 5621da177e4SLinus Torvalds 5631da177e4SLinus Torvalds insert_above: 5641da177e4SLinus Torvalds /* 5651da177e4SLinus Torvalds * split since we don't have a common prefix anymore or 5661da177e4SLinus Torvalds * we have a less significant route. 5671da177e4SLinus Torvalds * we've to insert an intermediate node on the list 5681da177e4SLinus Torvalds * this new node will point to the one we need to create 5691da177e4SLinus Torvalds * and the current 5701da177e4SLinus Torvalds */ 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds pn = fn->parent; 5731da177e4SLinus Torvalds 5741da177e4SLinus Torvalds /* find 1st bit in difference between the 2 addrs. 5751da177e4SLinus Torvalds 576971f359dSYOSHIFUJI Hideaki See comment in __ipv6_addr_diff: bit may be an invalid value, 5771da177e4SLinus Torvalds but if it is >= plen, the value is ignored in any case. 5781da177e4SLinus Torvalds */ 5791da177e4SLinus Torvalds 5809225b230Sfan.du bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr)); 5811da177e4SLinus Torvalds 5821da177e4SLinus Torvalds /* 5831da177e4SLinus Torvalds * (intermediate)[in] 5841da177e4SLinus Torvalds * / \ 5851da177e4SLinus Torvalds * (new leaf node)[ln] (old node)[fn] 5861da177e4SLinus Torvalds */ 5871da177e4SLinus Torvalds if (plen > bit) { 5881da177e4SLinus Torvalds in = node_alloc(); 5891da177e4SLinus Torvalds ln = node_alloc(); 5901da177e4SLinus Torvalds 591507c9b1eSDavid S. Miller if (!in || !ln) { 5921da177e4SLinus Torvalds if (in) 5931da177e4SLinus Torvalds node_free(in); 5941da177e4SLinus Torvalds if (ln) 5951da177e4SLinus Torvalds node_free(ln); 596188c517aSLin Ming return ERR_PTR(-ENOMEM); 5971da177e4SLinus Torvalds } 5981da177e4SLinus Torvalds 5991da177e4SLinus Torvalds /* 6001da177e4SLinus Torvalds * new intermediate node. 6011da177e4SLinus Torvalds * RTN_RTINFO will 6021da177e4SLinus Torvalds * be off since that an address that chooses one of 6031da177e4SLinus Torvalds * the branches would not match less specific routes 6041da177e4SLinus Torvalds * in the other branch 6051da177e4SLinus Torvalds */ 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds in->fn_bit = bit; 6081da177e4SLinus Torvalds 6091da177e4SLinus Torvalds in->parent = pn; 6101da177e4SLinus Torvalds in->leaf = fn->leaf; 6111da177e4SLinus Torvalds atomic_inc(&in->leaf->rt6i_ref); 6121da177e4SLinus Torvalds 6131da177e4SLinus Torvalds in->fn_sernum = sernum; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds /* update parent pointer */ 6161da177e4SLinus Torvalds if (dir) 6171da177e4SLinus Torvalds pn->right = in; 6181da177e4SLinus Torvalds else 6191da177e4SLinus Torvalds pn->left = in; 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds ln->fn_bit = plen; 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds ln->parent = in; 6241da177e4SLinus Torvalds fn->parent = in; 6251da177e4SLinus Torvalds 6261da177e4SLinus Torvalds ln->fn_sernum = sernum; 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds if (addr_bit_set(addr, bit)) { 6291da177e4SLinus Torvalds in->right = ln; 6301da177e4SLinus Torvalds in->left = fn; 6311da177e4SLinus Torvalds } else { 6321da177e4SLinus Torvalds in->left = ln; 6331da177e4SLinus Torvalds in->right = fn; 6341da177e4SLinus Torvalds } 6351da177e4SLinus Torvalds } else { /* plen <= bit */ 6361da177e4SLinus Torvalds 6371da177e4SLinus Torvalds /* 6381da177e4SLinus Torvalds * (new leaf node)[ln] 6391da177e4SLinus Torvalds * / \ 6401da177e4SLinus Torvalds * (old node)[fn] NULL 6411da177e4SLinus Torvalds */ 6421da177e4SLinus Torvalds 6431da177e4SLinus Torvalds ln = node_alloc(); 6441da177e4SLinus Torvalds 645507c9b1eSDavid S. Miller if (!ln) 646188c517aSLin Ming return ERR_PTR(-ENOMEM); 6471da177e4SLinus Torvalds 6481da177e4SLinus Torvalds ln->fn_bit = plen; 6491da177e4SLinus Torvalds 6501da177e4SLinus Torvalds ln->parent = pn; 6511da177e4SLinus Torvalds 6521da177e4SLinus Torvalds ln->fn_sernum = sernum; 6531da177e4SLinus Torvalds 6541da177e4SLinus Torvalds if (dir) 6551da177e4SLinus Torvalds pn->right = ln; 6561da177e4SLinus Torvalds else 6571da177e4SLinus Torvalds pn->left = ln; 6581da177e4SLinus Torvalds 6591da177e4SLinus Torvalds if (addr_bit_set(&key->addr, plen)) 6601da177e4SLinus Torvalds ln->right = fn; 6611da177e4SLinus Torvalds else 6621da177e4SLinus Torvalds ln->left = fn; 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds fn->parent = ln; 6651da177e4SLinus Torvalds } 6661da177e4SLinus Torvalds return ln; 6671da177e4SLinus Torvalds } 6681da177e4SLinus Torvalds 66994b2cfe0SHannes Frederic Sowa static bool rt6_qualify_for_ecmp(struct rt6_info *rt) 670307f2fb9SHannes Frederic Sowa { 671307f2fb9SHannes Frederic Sowa return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == 672307f2fb9SHannes Frederic Sowa RTF_GATEWAY; 673307f2fb9SHannes Frederic Sowa } 674307f2fb9SHannes Frederic Sowa 675e715b6d3SFlorian Westphal static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) 676e5fd387aSMichal Kubeček { 677e715b6d3SFlorian Westphal int i; 678e5fd387aSMichal Kubeček 679e715b6d3SFlorian Westphal for (i = 0; i < RTAX_MAX; i++) { 680e715b6d3SFlorian Westphal if (test_bit(i, mxc->mx_valid)) 681e715b6d3SFlorian Westphal mp[i] = mxc->mx[i]; 682e715b6d3SFlorian Westphal } 683e715b6d3SFlorian Westphal } 684e715b6d3SFlorian Westphal 685e715b6d3SFlorian Westphal static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) 686e715b6d3SFlorian Westphal { 687e715b6d3SFlorian Westphal if (!mxc->mx) 688e715b6d3SFlorian Westphal return 0; 689e715b6d3SFlorian Westphal 690e715b6d3SFlorian Westphal if (dst->flags & DST_HOST) { 691e715b6d3SFlorian Westphal u32 *mp = dst_metrics_write_ptr(dst); 692e715b6d3SFlorian Westphal 6930409c9a5SDaniel Borkmann if (unlikely(!mp)) 694e5fd387aSMichal Kubeček return -ENOMEM; 695e5fd387aSMichal Kubeček 696e715b6d3SFlorian Westphal fib6_copy_metrics(mp, mxc); 697e715b6d3SFlorian Westphal } else { 698e715b6d3SFlorian Westphal dst_init_metrics(dst, mxc->mx, false); 699e5fd387aSMichal Kubeček 700e715b6d3SFlorian Westphal /* We've stolen mx now. */ 701e715b6d3SFlorian Westphal mxc->mx = NULL; 702e5fd387aSMichal Kubeček } 703e715b6d3SFlorian Westphal 704e5fd387aSMichal Kubeček return 0; 705e5fd387aSMichal Kubeček } 706e5fd387aSMichal Kubeček 7076e9e16e6SHannes Frederic Sowa static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, 7086e9e16e6SHannes Frederic Sowa struct net *net) 7096e9e16e6SHannes Frederic Sowa { 7106e9e16e6SHannes Frederic Sowa if (atomic_read(&rt->rt6i_ref) != 1) { 7116e9e16e6SHannes Frederic Sowa /* This route is used as dummy address holder in some split 7126e9e16e6SHannes Frederic Sowa * nodes. It is not leaked, but it still holds other resources, 7136e9e16e6SHannes Frederic Sowa * which must be released in time. So, scan ascendant nodes 7146e9e16e6SHannes Frederic Sowa * and replace dummy references to this route with references 7156e9e16e6SHannes Frederic Sowa * to still alive ones. 7166e9e16e6SHannes Frederic Sowa */ 7176e9e16e6SHannes Frederic Sowa while (fn) { 7186e9e16e6SHannes Frederic Sowa if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { 7196e9e16e6SHannes Frederic Sowa fn->leaf = fib6_find_prefix(net, fn); 7206e9e16e6SHannes Frederic Sowa atomic_inc(&fn->leaf->rt6i_ref); 7216e9e16e6SHannes Frederic Sowa rt6_release(rt); 7226e9e16e6SHannes Frederic Sowa } 7236e9e16e6SHannes Frederic Sowa fn = fn->parent; 7246e9e16e6SHannes Frederic Sowa } 7256e9e16e6SHannes Frederic Sowa /* No more references are possible at this point. */ 7266e9e16e6SHannes Frederic Sowa BUG_ON(atomic_read(&rt->rt6i_ref) != 1); 7276e9e16e6SHannes Frederic Sowa } 7286e9e16e6SHannes Frederic Sowa } 7296e9e16e6SHannes Frederic Sowa 7301da177e4SLinus Torvalds /* 7311da177e4SLinus Torvalds * Insert routing information in a node. 7321da177e4SLinus Torvalds */ 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 735e715b6d3SFlorian Westphal struct nl_info *info, struct mx6_config *mxc) 7361da177e4SLinus Torvalds { 7371da177e4SLinus Torvalds struct rt6_info *iter = NULL; 7381da177e4SLinus Torvalds struct rt6_info **ins; 73927596472SMichal Kubeček struct rt6_info **fallback_ins = NULL; 740507c9b1eSDavid S. Miller int replace = (info->nlh && 7414a287ebaSMatti Vaittinen (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 742507c9b1eSDavid S. Miller int add = (!info->nlh || 7434a287ebaSMatti Vaittinen (info->nlh->nlmsg_flags & NLM_F_CREATE)); 7444a287ebaSMatti Vaittinen int found = 0; 745307f2fb9SHannes Frederic Sowa bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); 746*73483c12SGuillaume Nault u16 nlflags = NLM_F_EXCL; 747e5fd387aSMichal Kubeček int err; 7481da177e4SLinus Torvalds 7491da177e4SLinus Torvalds ins = &fn->leaf; 7501da177e4SLinus Torvalds 751d8d1f30bSChangli Gao for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { 7521da177e4SLinus Torvalds /* 7531da177e4SLinus Torvalds * Search for duplicates 7541da177e4SLinus Torvalds */ 7551da177e4SLinus Torvalds 7561da177e4SLinus Torvalds if (iter->rt6i_metric == rt->rt6i_metric) { 7571da177e4SLinus Torvalds /* 7581da177e4SLinus Torvalds * Same priority level 7591da177e4SLinus Torvalds */ 760507c9b1eSDavid S. Miller if (info->nlh && 7614a287ebaSMatti Vaittinen (info->nlh->nlmsg_flags & NLM_F_EXCL)) 7624a287ebaSMatti Vaittinen return -EEXIST; 763*73483c12SGuillaume Nault 764*73483c12SGuillaume Nault nlflags &= ~NLM_F_EXCL; 7654a287ebaSMatti Vaittinen if (replace) { 76627596472SMichal Kubeček if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { 7674a287ebaSMatti Vaittinen found++; 7684a287ebaSMatti Vaittinen break; 7694a287ebaSMatti Vaittinen } 77027596472SMichal Kubeček if (rt_can_ecmp) 77127596472SMichal Kubeček fallback_ins = fallback_ins ?: ins; 77227596472SMichal Kubeček goto next_iter; 77327596472SMichal Kubeček } 7741da177e4SLinus Torvalds 775d1918542SDavid S. Miller if (iter->dst.dev == rt->dst.dev && 7761da177e4SLinus Torvalds iter->rt6i_idev == rt->rt6i_idev && 7771da177e4SLinus Torvalds ipv6_addr_equal(&iter->rt6i_gateway, 7781da177e4SLinus Torvalds &rt->rt6i_gateway)) { 77951ebd318SNicolas Dichtel if (rt->rt6i_nsiblings) 78051ebd318SNicolas Dichtel rt->rt6i_nsiblings = 0; 7811da177e4SLinus Torvalds if (!(iter->rt6i_flags & RTF_EXPIRES)) 7821da177e4SLinus Torvalds return -EEXIST; 7831716a961SGao feng if (!(rt->rt6i_flags & RTF_EXPIRES)) 7841716a961SGao feng rt6_clean_expires(iter); 7851716a961SGao feng else 7861716a961SGao feng rt6_set_expires(iter, rt->dst.expires); 78745e4fd26SMartin KaFai Lau iter->rt6i_pmtu = rt->rt6i_pmtu; 7881da177e4SLinus Torvalds return -EEXIST; 7891da177e4SLinus Torvalds } 79051ebd318SNicolas Dichtel /* If we have the same destination and the same metric, 79151ebd318SNicolas Dichtel * but not the same gateway, then the route we try to 79251ebd318SNicolas Dichtel * add is sibling to this route, increment our counter 79351ebd318SNicolas Dichtel * of siblings, and later we will add our route to the 79451ebd318SNicolas Dichtel * list. 79551ebd318SNicolas Dichtel * Only static routes (which don't have flag 79651ebd318SNicolas Dichtel * RTF_EXPIRES) are used for ECMPv6. 79751ebd318SNicolas Dichtel * 79851ebd318SNicolas Dichtel * To avoid long list, we only had siblings if the 79951ebd318SNicolas Dichtel * route have a gateway. 80051ebd318SNicolas Dichtel */ 801307f2fb9SHannes Frederic Sowa if (rt_can_ecmp && 802307f2fb9SHannes Frederic Sowa rt6_qualify_for_ecmp(iter)) 80351ebd318SNicolas Dichtel rt->rt6i_nsiblings++; 8041da177e4SLinus Torvalds } 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds if (iter->rt6i_metric > rt->rt6i_metric) 8071da177e4SLinus Torvalds break; 8081da177e4SLinus Torvalds 80927596472SMichal Kubeček next_iter: 810d8d1f30bSChangli Gao ins = &iter->dst.rt6_next; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 81327596472SMichal Kubeček if (fallback_ins && !found) { 81427596472SMichal Kubeček /* No ECMP-able route found, replace first non-ECMP one */ 81527596472SMichal Kubeček ins = fallback_ins; 81627596472SMichal Kubeček iter = *ins; 81727596472SMichal Kubeček found++; 81827596472SMichal Kubeček } 81927596472SMichal Kubeček 820f11e6659SDavid S. Miller /* Reset round-robin state, if necessary */ 821f11e6659SDavid S. Miller if (ins == &fn->leaf) 822f11e6659SDavid S. Miller fn->rr_ptr = NULL; 823f11e6659SDavid S. Miller 82451ebd318SNicolas Dichtel /* Link this route to others same route. */ 82551ebd318SNicolas Dichtel if (rt->rt6i_nsiblings) { 82651ebd318SNicolas Dichtel unsigned int rt6i_nsiblings; 82751ebd318SNicolas Dichtel struct rt6_info *sibling, *temp_sibling; 82851ebd318SNicolas Dichtel 82951ebd318SNicolas Dichtel /* Find the first route that have the same metric */ 83051ebd318SNicolas Dichtel sibling = fn->leaf; 83151ebd318SNicolas Dichtel while (sibling) { 832307f2fb9SHannes Frederic Sowa if (sibling->rt6i_metric == rt->rt6i_metric && 833307f2fb9SHannes Frederic Sowa rt6_qualify_for_ecmp(sibling)) { 83451ebd318SNicolas Dichtel list_add_tail(&rt->rt6i_siblings, 83551ebd318SNicolas Dichtel &sibling->rt6i_siblings); 83651ebd318SNicolas Dichtel break; 83751ebd318SNicolas Dichtel } 83851ebd318SNicolas Dichtel sibling = sibling->dst.rt6_next; 83951ebd318SNicolas Dichtel } 84051ebd318SNicolas Dichtel /* For each sibling in the list, increment the counter of 84151ebd318SNicolas Dichtel * siblings. BUG() if counters does not match, list of siblings 84251ebd318SNicolas Dichtel * is broken! 84351ebd318SNicolas Dichtel */ 84451ebd318SNicolas Dichtel rt6i_nsiblings = 0; 84551ebd318SNicolas Dichtel list_for_each_entry_safe(sibling, temp_sibling, 84651ebd318SNicolas Dichtel &rt->rt6i_siblings, rt6i_siblings) { 84751ebd318SNicolas Dichtel sibling->rt6i_nsiblings++; 84851ebd318SNicolas Dichtel BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); 84951ebd318SNicolas Dichtel rt6i_nsiblings++; 85051ebd318SNicolas Dichtel } 85151ebd318SNicolas Dichtel BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); 85251ebd318SNicolas Dichtel } 85351ebd318SNicolas Dichtel 8541da177e4SLinus Torvalds /* 8551da177e4SLinus Torvalds * insert node 8561da177e4SLinus Torvalds */ 8574a287ebaSMatti Vaittinen if (!replace) { 8584a287ebaSMatti Vaittinen if (!add) 859f3213831SJoe Perches pr_warn("NLM_F_CREATE should be set when creating new route\n"); 8601da177e4SLinus Torvalds 8614a287ebaSMatti Vaittinen add: 862*73483c12SGuillaume Nault nlflags |= NLM_F_CREATE; 863e715b6d3SFlorian Westphal err = fib6_commit_metrics(&rt->dst, mxc); 864e5fd387aSMichal Kubeček if (err) 865e5fd387aSMichal Kubeček return err; 866e715b6d3SFlorian Westphal 867d8d1f30bSChangli Gao rt->dst.rt6_next = iter; 8681da177e4SLinus Torvalds *ins = rt; 8691da177e4SLinus Torvalds rt->rt6i_node = fn; 8701da177e4SLinus Torvalds atomic_inc(&rt->rt6i_ref); 871*73483c12SGuillaume Nault inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 872c572872fSBenjamin Thery info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 8731da177e4SLinus Torvalds 874507c9b1eSDavid S. Miller if (!(fn->fn_flags & RTN_RTINFO)) { 875c572872fSBenjamin Thery info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 8761da177e4SLinus Torvalds fn->fn_flags |= RTN_RTINFO; 8771da177e4SLinus Torvalds } 8781da177e4SLinus Torvalds 8794a287ebaSMatti Vaittinen } else { 88027596472SMichal Kubeček int nsiblings; 88127596472SMichal Kubeček 8824a287ebaSMatti Vaittinen if (!found) { 8834a287ebaSMatti Vaittinen if (add) 8844a287ebaSMatti Vaittinen goto add; 885f3213831SJoe Perches pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); 8864a287ebaSMatti Vaittinen return -ENOENT; 8874a287ebaSMatti Vaittinen } 888e715b6d3SFlorian Westphal 889e715b6d3SFlorian Westphal err = fib6_commit_metrics(&rt->dst, mxc); 890e5fd387aSMichal Kubeček if (err) 891e5fd387aSMichal Kubeček return err; 892e715b6d3SFlorian Westphal 8934a287ebaSMatti Vaittinen *ins = rt; 8944a287ebaSMatti Vaittinen rt->rt6i_node = fn; 8954a287ebaSMatti Vaittinen rt->dst.rt6_next = iter->dst.rt6_next; 8964a287ebaSMatti Vaittinen atomic_inc(&rt->rt6i_ref); 89737a1d361SRoopa Prabhu inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); 898507c9b1eSDavid S. Miller if (!(fn->fn_flags & RTN_RTINFO)) { 8994a287ebaSMatti Vaittinen info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 9004a287ebaSMatti Vaittinen fn->fn_flags |= RTN_RTINFO; 9014a287ebaSMatti Vaittinen } 90227596472SMichal Kubeček nsiblings = iter->rt6i_nsiblings; 9036e9e16e6SHannes Frederic Sowa fib6_purge_rt(iter, fn, info->nl_net); 9046e9e16e6SHannes Frederic Sowa rt6_release(iter); 90527596472SMichal Kubeček 90627596472SMichal Kubeček if (nsiblings) { 90727596472SMichal Kubeček /* Replacing an ECMP route, remove all siblings */ 90827596472SMichal Kubeček ins = &rt->dst.rt6_next; 90927596472SMichal Kubeček iter = *ins; 91027596472SMichal Kubeček while (iter) { 91127596472SMichal Kubeček if (rt6_qualify_for_ecmp(iter)) { 91227596472SMichal Kubeček *ins = iter->dst.rt6_next; 91327596472SMichal Kubeček fib6_purge_rt(iter, fn, info->nl_net); 91427596472SMichal Kubeček rt6_release(iter); 91527596472SMichal Kubeček nsiblings--; 91627596472SMichal Kubeček } else { 91727596472SMichal Kubeček ins = &iter->dst.rt6_next; 91827596472SMichal Kubeček } 91927596472SMichal Kubeček iter = *ins; 92027596472SMichal Kubeček } 92127596472SMichal Kubeček WARN_ON(nsiblings != 0); 92227596472SMichal Kubeček } 9234a287ebaSMatti Vaittinen } 9244a287ebaSMatti Vaittinen 9251da177e4SLinus Torvalds return 0; 9261da177e4SLinus Torvalds } 9271da177e4SLinus Torvalds 92894b2cfe0SHannes Frederic Sowa static void fib6_start_gc(struct net *net, struct rt6_info *rt) 9291da177e4SLinus Torvalds { 930417f28bbSStephen Hemminger if (!timer_pending(&net->ipv6.ip6_fib_timer) && 9311da177e4SLinus Torvalds (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) 932417f28bbSStephen Hemminger mod_timer(&net->ipv6.ip6_fib_timer, 933847499ceSStephen Hemminger jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 9341da177e4SLinus Torvalds } 9351da177e4SLinus Torvalds 93663152fc0SDaniel Lezcano void fib6_force_start_gc(struct net *net) 9371da177e4SLinus Torvalds { 938417f28bbSStephen Hemminger if (!timer_pending(&net->ipv6.ip6_fib_timer)) 939417f28bbSStephen Hemminger mod_timer(&net->ipv6.ip6_fib_timer, 940847499ceSStephen Hemminger jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 9411da177e4SLinus Torvalds } 9421da177e4SLinus Torvalds 9431da177e4SLinus Torvalds /* 9441da177e4SLinus Torvalds * Add routing information to the routing tree. 9451da177e4SLinus Torvalds * <destination addr>/<source addr> 9461da177e4SLinus Torvalds * with source addr info in sub-trees 9471da177e4SLinus Torvalds */ 9481da177e4SLinus Torvalds 949e715b6d3SFlorian Westphal int fib6_add(struct fib6_node *root, struct rt6_info *rt, 950e715b6d3SFlorian Westphal struct nl_info *info, struct mx6_config *mxc) 9511da177e4SLinus Torvalds { 95266729e18SYOSHIFUJI Hideaki struct fib6_node *fn, *pn = NULL; 9531da177e4SLinus Torvalds int err = -ENOMEM; 9544a287ebaSMatti Vaittinen int allow_create = 1; 9554a287ebaSMatti Vaittinen int replace_required = 0; 956812918c4SHannes Frederic Sowa int sernum = fib6_new_sernum(info->nl_net); 957507c9b1eSDavid S. Miller 9588e3d5be7SMartin KaFai Lau if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) && 9598e3d5be7SMartin KaFai Lau !atomic_read(&rt->dst.__refcnt))) 9608e3d5be7SMartin KaFai Lau return -EINVAL; 9618e3d5be7SMartin KaFai Lau 962507c9b1eSDavid S. Miller if (info->nlh) { 9634a287ebaSMatti Vaittinen if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 9644a287ebaSMatti Vaittinen allow_create = 0; 965507c9b1eSDavid S. Miller if (info->nlh->nlmsg_flags & NLM_F_REPLACE) 9664a287ebaSMatti Vaittinen replace_required = 1; 9674a287ebaSMatti Vaittinen } 9684a287ebaSMatti Vaittinen if (!allow_create && !replace_required) 969f3213831SJoe Perches pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 9701da177e4SLinus Torvalds 9719225b230Sfan.du fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, 9729225b230Sfan.du offsetof(struct rt6_info, rt6i_dst), allow_create, 973c8c4d42aSHannes Frederic Sowa replace_required, sernum); 9744a287ebaSMatti Vaittinen if (IS_ERR(fn)) { 9754a287ebaSMatti Vaittinen err = PTR_ERR(fn); 976ae7b4e1fSDaniel Borkmann fn = NULL; 9771da177e4SLinus Torvalds goto out; 978188c517aSLin Ming } 9791da177e4SLinus Torvalds 98066729e18SYOSHIFUJI Hideaki pn = fn; 98166729e18SYOSHIFUJI Hideaki 9821da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 9831da177e4SLinus Torvalds if (rt->rt6i_src.plen) { 9841da177e4SLinus Torvalds struct fib6_node *sn; 9851da177e4SLinus Torvalds 986507c9b1eSDavid S. Miller if (!fn->subtree) { 9871da177e4SLinus Torvalds struct fib6_node *sfn; 9881da177e4SLinus Torvalds 9891da177e4SLinus Torvalds /* 9901da177e4SLinus Torvalds * Create subtree. 9911da177e4SLinus Torvalds * 9921da177e4SLinus Torvalds * fn[main tree] 9931da177e4SLinus Torvalds * | 9941da177e4SLinus Torvalds * sfn[subtree root] 9951da177e4SLinus Torvalds * \ 9961da177e4SLinus Torvalds * sn[new leaf node] 9971da177e4SLinus Torvalds */ 9981da177e4SLinus Torvalds 9991da177e4SLinus Torvalds /* Create subtree root node */ 10001da177e4SLinus Torvalds sfn = node_alloc(); 1001507c9b1eSDavid S. Miller if (!sfn) 10021da177e4SLinus Torvalds goto st_failure; 10031da177e4SLinus Torvalds 10048ed67789SDaniel Lezcano sfn->leaf = info->nl_net->ipv6.ip6_null_entry; 10058ed67789SDaniel Lezcano atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); 10061da177e4SLinus Torvalds sfn->fn_flags = RTN_ROOT; 1007c8c4d42aSHannes Frederic Sowa sfn->fn_sernum = sernum; 10081da177e4SLinus Torvalds 10091da177e4SLinus Torvalds /* Now add the first leaf node to new subtree */ 10101da177e4SLinus Torvalds 10111da177e4SLinus Torvalds sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 10129225b230Sfan.du rt->rt6i_src.plen, 10134a287ebaSMatti Vaittinen offsetof(struct rt6_info, rt6i_src), 1014c8c4d42aSHannes Frederic Sowa allow_create, replace_required, sernum); 10151da177e4SLinus Torvalds 1016f950c0ecSWei Yongjun if (IS_ERR(sn)) { 10171da177e4SLinus Torvalds /* If it is failed, discard just allocated 10181da177e4SLinus Torvalds root, and then (in st_failure) stale node 10191da177e4SLinus Torvalds in main tree. 10201da177e4SLinus Torvalds */ 10211da177e4SLinus Torvalds node_free(sfn); 1022188c517aSLin Ming err = PTR_ERR(sn); 10231da177e4SLinus Torvalds goto st_failure; 10241da177e4SLinus Torvalds } 10251da177e4SLinus Torvalds 10261da177e4SLinus Torvalds /* Now link new subtree to main tree */ 10271da177e4SLinus Torvalds sfn->parent = fn; 10281da177e4SLinus Torvalds fn->subtree = sfn; 10291da177e4SLinus Torvalds } else { 10301da177e4SLinus Torvalds sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 10319225b230Sfan.du rt->rt6i_src.plen, 10324a287ebaSMatti Vaittinen offsetof(struct rt6_info, rt6i_src), 1033c8c4d42aSHannes Frederic Sowa allow_create, replace_required, sernum); 10341da177e4SLinus Torvalds 10354a287ebaSMatti Vaittinen if (IS_ERR(sn)) { 10364a287ebaSMatti Vaittinen err = PTR_ERR(sn); 10371da177e4SLinus Torvalds goto st_failure; 10381da177e4SLinus Torvalds } 1039188c517aSLin Ming } 10401da177e4SLinus Torvalds 1041507c9b1eSDavid S. Miller if (!fn->leaf) { 104266729e18SYOSHIFUJI Hideaki fn->leaf = rt; 104366729e18SYOSHIFUJI Hideaki atomic_inc(&rt->rt6i_ref); 104466729e18SYOSHIFUJI Hideaki } 10451da177e4SLinus Torvalds fn = sn; 10461da177e4SLinus Torvalds } 10471da177e4SLinus Torvalds #endif 10481da177e4SLinus Torvalds 1049e715b6d3SFlorian Westphal err = fib6_add_rt2node(fn, rt, info, mxc); 1050507c9b1eSDavid S. Miller if (!err) { 105163152fc0SDaniel Lezcano fib6_start_gc(info->nl_net, rt); 10521da177e4SLinus Torvalds if (!(rt->rt6i_flags & RTF_CACHE)) 1053163cd4e8SDuan Jiong fib6_prune_clones(info->nl_net, pn); 10548e3d5be7SMartin KaFai Lau rt->dst.flags &= ~DST_NOCACHE; 10551da177e4SLinus Torvalds } 10561da177e4SLinus Torvalds 10571da177e4SLinus Torvalds out: 105866729e18SYOSHIFUJI Hideaki if (err) { 105966729e18SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES 106066729e18SYOSHIFUJI Hideaki /* 106166729e18SYOSHIFUJI Hideaki * If fib6_add_1 has cleared the old leaf pointer in the 106266729e18SYOSHIFUJI Hideaki * super-tree leaf node we have to find a new one for it. 106366729e18SYOSHIFUJI Hideaki */ 10643c051235SDavid S. Miller if (pn != fn && pn->leaf == rt) { 10653c051235SDavid S. Miller pn->leaf = NULL; 10663c051235SDavid S. Miller atomic_dec(&rt->rt6i_ref); 10673c051235SDavid S. Miller } 106866729e18SYOSHIFUJI Hideaki if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { 10698ed67789SDaniel Lezcano pn->leaf = fib6_find_prefix(info->nl_net, pn); 107066729e18SYOSHIFUJI Hideaki #if RT6_DEBUG >= 2 107166729e18SYOSHIFUJI Hideaki if (!pn->leaf) { 1072547b792cSIlpo Järvinen WARN_ON(pn->leaf == NULL); 10738ed67789SDaniel Lezcano pn->leaf = info->nl_net->ipv6.ip6_null_entry; 107466729e18SYOSHIFUJI Hideaki } 107566729e18SYOSHIFUJI Hideaki #endif 107666729e18SYOSHIFUJI Hideaki atomic_inc(&pn->leaf->rt6i_ref); 107766729e18SYOSHIFUJI Hideaki } 107866729e18SYOSHIFUJI Hideaki #endif 10798e3d5be7SMartin KaFai Lau if (!(rt->dst.flags & DST_NOCACHE)) 1080d8d1f30bSChangli Gao dst_free(&rt->dst); 108166729e18SYOSHIFUJI Hideaki } 10821da177e4SLinus Torvalds return err; 10831da177e4SLinus Torvalds 10841da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 10851da177e4SLinus Torvalds /* Subtree creation failed, probably main tree node 10861da177e4SLinus Torvalds is orphan. If it is, shoot it. 10871da177e4SLinus Torvalds */ 10881da177e4SLinus Torvalds st_failure: 10891da177e4SLinus Torvalds if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 10908ed67789SDaniel Lezcano fib6_repair_tree(info->nl_net, fn); 10918e3d5be7SMartin KaFai Lau if (!(rt->dst.flags & DST_NOCACHE)) 1092d8d1f30bSChangli Gao dst_free(&rt->dst); 10931da177e4SLinus Torvalds return err; 10941da177e4SLinus Torvalds #endif 10951da177e4SLinus Torvalds } 10961da177e4SLinus Torvalds 10971da177e4SLinus Torvalds /* 10981da177e4SLinus Torvalds * Routing tree lookup 10991da177e4SLinus Torvalds * 11001da177e4SLinus Torvalds */ 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds struct lookup_args { 11031da177e4SLinus Torvalds int offset; /* key offset on rt6_info */ 1104b71d1d42SEric Dumazet const struct in6_addr *addr; /* search key */ 11051da177e4SLinus Torvalds }; 11061da177e4SLinus Torvalds 11071da177e4SLinus Torvalds static struct fib6_node *fib6_lookup_1(struct fib6_node *root, 11081da177e4SLinus Torvalds struct lookup_args *args) 11091da177e4SLinus Torvalds { 11101da177e4SLinus Torvalds struct fib6_node *fn; 1111e69a4adcSAl Viro __be32 dir; 11121da177e4SLinus Torvalds 1113825e288eSYOSHIFUJI Hideaki if (unlikely(args->offset == 0)) 1114825e288eSYOSHIFUJI Hideaki return NULL; 1115825e288eSYOSHIFUJI Hideaki 11161da177e4SLinus Torvalds /* 11171da177e4SLinus Torvalds * Descend on a tree 11181da177e4SLinus Torvalds */ 11191da177e4SLinus Torvalds 11201da177e4SLinus Torvalds fn = root; 11211da177e4SLinus Torvalds 11221da177e4SLinus Torvalds for (;;) { 11231da177e4SLinus Torvalds struct fib6_node *next; 11241da177e4SLinus Torvalds 11251da177e4SLinus Torvalds dir = addr_bit_set(args->addr, fn->fn_bit); 11261da177e4SLinus Torvalds 11271da177e4SLinus Torvalds next = dir ? fn->right : fn->left; 11281da177e4SLinus Torvalds 11291da177e4SLinus Torvalds if (next) { 11301da177e4SLinus Torvalds fn = next; 11311da177e4SLinus Torvalds continue; 11321da177e4SLinus Torvalds } 11331da177e4SLinus Torvalds break; 11341da177e4SLinus Torvalds } 11351da177e4SLinus Torvalds 11363fc5e044SYOSHIFUJI Hideaki while (fn) { 11377fc33165SYOSHIFUJI Hideaki if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { 11381da177e4SLinus Torvalds struct rt6key *key; 11391da177e4SLinus Torvalds 11401da177e4SLinus Torvalds key = (struct rt6key *) ((u8 *) fn->leaf + 11411da177e4SLinus Torvalds args->offset); 11421da177e4SLinus Torvalds 11433fc5e044SYOSHIFUJI Hideaki if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 11443fc5e044SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES 11453e3be275SHannes Frederic Sowa if (fn->subtree) { 11463e3be275SHannes Frederic Sowa struct fib6_node *sfn; 11473e3be275SHannes Frederic Sowa sfn = fib6_lookup_1(fn->subtree, 11483e3be275SHannes Frederic Sowa args + 1); 11493e3be275SHannes Frederic Sowa if (!sfn) 11503e3be275SHannes Frederic Sowa goto backtrack; 11513e3be275SHannes Frederic Sowa fn = sfn; 11523e3be275SHannes Frederic Sowa } 11533fc5e044SYOSHIFUJI Hideaki #endif 11543e3be275SHannes Frederic Sowa if (fn->fn_flags & RTN_RTINFO) 11551da177e4SLinus Torvalds return fn; 11561da177e4SLinus Torvalds } 11573fc5e044SYOSHIFUJI Hideaki } 11583e3be275SHannes Frederic Sowa #ifdef CONFIG_IPV6_SUBTREES 11593e3be275SHannes Frederic Sowa backtrack: 11603e3be275SHannes Frederic Sowa #endif 11613fc5e044SYOSHIFUJI Hideaki if (fn->fn_flags & RTN_ROOT) 11623fc5e044SYOSHIFUJI Hideaki break; 11631da177e4SLinus Torvalds 11641da177e4SLinus Torvalds fn = fn->parent; 11651da177e4SLinus Torvalds } 11661da177e4SLinus Torvalds 11671da177e4SLinus Torvalds return NULL; 11681da177e4SLinus Torvalds } 11691da177e4SLinus Torvalds 1170b71d1d42SEric Dumazet struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, 1171b71d1d42SEric Dumazet const struct in6_addr *saddr) 11721da177e4SLinus Torvalds { 11731da177e4SLinus Torvalds struct fib6_node *fn; 1174825e288eSYOSHIFUJI Hideaki struct lookup_args args[] = { 1175825e288eSYOSHIFUJI Hideaki { 1176825e288eSYOSHIFUJI Hideaki .offset = offsetof(struct rt6_info, rt6i_dst), 1177825e288eSYOSHIFUJI Hideaki .addr = daddr, 1178825e288eSYOSHIFUJI Hideaki }, 11791da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 1180825e288eSYOSHIFUJI Hideaki { 1181825e288eSYOSHIFUJI Hideaki .offset = offsetof(struct rt6_info, rt6i_src), 1182825e288eSYOSHIFUJI Hideaki .addr = saddr, 1183825e288eSYOSHIFUJI Hideaki }, 11841da177e4SLinus Torvalds #endif 1185825e288eSYOSHIFUJI Hideaki { 1186825e288eSYOSHIFUJI Hideaki .offset = 0, /* sentinel */ 1187825e288eSYOSHIFUJI Hideaki } 1188825e288eSYOSHIFUJI Hideaki }; 11891da177e4SLinus Torvalds 1190fefc2a6cSYOSHIFUJI Hideaki fn = fib6_lookup_1(root, daddr ? args : args + 1); 1191507c9b1eSDavid S. Miller if (!fn || fn->fn_flags & RTN_TL_ROOT) 11921da177e4SLinus Torvalds fn = root; 11931da177e4SLinus Torvalds 11941da177e4SLinus Torvalds return fn; 11951da177e4SLinus Torvalds } 11961da177e4SLinus Torvalds 11971da177e4SLinus Torvalds /* 11981da177e4SLinus Torvalds * Get node with specified destination prefix (and source prefix, 11991da177e4SLinus Torvalds * if subtrees are used) 12001da177e4SLinus Torvalds */ 12011da177e4SLinus Torvalds 12021da177e4SLinus Torvalds 12031da177e4SLinus Torvalds static struct fib6_node *fib6_locate_1(struct fib6_node *root, 1204b71d1d42SEric Dumazet const struct in6_addr *addr, 12051da177e4SLinus Torvalds int plen, int offset) 12061da177e4SLinus Torvalds { 12071da177e4SLinus Torvalds struct fib6_node *fn; 12081da177e4SLinus Torvalds 12091da177e4SLinus Torvalds for (fn = root; fn ; ) { 12101da177e4SLinus Torvalds struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); 12111da177e4SLinus Torvalds 12121da177e4SLinus Torvalds /* 12131da177e4SLinus Torvalds * Prefix match 12141da177e4SLinus Torvalds */ 12151da177e4SLinus Torvalds if (plen < fn->fn_bit || 12161da177e4SLinus Torvalds !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 12171da177e4SLinus Torvalds return NULL; 12181da177e4SLinus Torvalds 12191da177e4SLinus Torvalds if (plen == fn->fn_bit) 12201da177e4SLinus Torvalds return fn; 12211da177e4SLinus Torvalds 12221da177e4SLinus Torvalds /* 12231da177e4SLinus Torvalds * We have more bits to go 12241da177e4SLinus Torvalds */ 12251da177e4SLinus Torvalds if (addr_bit_set(addr, fn->fn_bit)) 12261da177e4SLinus Torvalds fn = fn->right; 12271da177e4SLinus Torvalds else 12281da177e4SLinus Torvalds fn = fn->left; 12291da177e4SLinus Torvalds } 12301da177e4SLinus Torvalds return NULL; 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds struct fib6_node *fib6_locate(struct fib6_node *root, 1234b71d1d42SEric Dumazet const struct in6_addr *daddr, int dst_len, 1235b71d1d42SEric Dumazet const struct in6_addr *saddr, int src_len) 12361da177e4SLinus Torvalds { 12371da177e4SLinus Torvalds struct fib6_node *fn; 12381da177e4SLinus Torvalds 12391da177e4SLinus Torvalds fn = fib6_locate_1(root, daddr, dst_len, 12401da177e4SLinus Torvalds offsetof(struct rt6_info, rt6i_dst)); 12411da177e4SLinus Torvalds 12421da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 12431da177e4SLinus Torvalds if (src_len) { 1244547b792cSIlpo Järvinen WARN_ON(saddr == NULL); 12453fc5e044SYOSHIFUJI Hideaki if (fn && fn->subtree) 12463fc5e044SYOSHIFUJI Hideaki fn = fib6_locate_1(fn->subtree, saddr, src_len, 12471da177e4SLinus Torvalds offsetof(struct rt6_info, rt6i_src)); 12481da177e4SLinus Torvalds } 12491da177e4SLinus Torvalds #endif 12501da177e4SLinus Torvalds 12511da177e4SLinus Torvalds if (fn && fn->fn_flags & RTN_RTINFO) 12521da177e4SLinus Torvalds return fn; 12531da177e4SLinus Torvalds 12541da177e4SLinus Torvalds return NULL; 12551da177e4SLinus Torvalds } 12561da177e4SLinus Torvalds 12571da177e4SLinus Torvalds 12581da177e4SLinus Torvalds /* 12591da177e4SLinus Torvalds * Deletion 12601da177e4SLinus Torvalds * 12611da177e4SLinus Torvalds */ 12621da177e4SLinus Torvalds 12638ed67789SDaniel Lezcano static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) 12641da177e4SLinus Torvalds { 12651da177e4SLinus Torvalds if (fn->fn_flags & RTN_ROOT) 12668ed67789SDaniel Lezcano return net->ipv6.ip6_null_entry; 12671da177e4SLinus Torvalds 12681da177e4SLinus Torvalds while (fn) { 12691da177e4SLinus Torvalds if (fn->left) 12701da177e4SLinus Torvalds return fn->left->leaf; 12711da177e4SLinus Torvalds if (fn->right) 12721da177e4SLinus Torvalds return fn->right->leaf; 12731da177e4SLinus Torvalds 12747fc33165SYOSHIFUJI Hideaki fn = FIB6_SUBTREE(fn); 12751da177e4SLinus Torvalds } 12761da177e4SLinus Torvalds return NULL; 12771da177e4SLinus Torvalds } 12781da177e4SLinus Torvalds 12791da177e4SLinus Torvalds /* 12801da177e4SLinus Torvalds * Called to trim the tree of intermediate nodes when possible. "fn" 12811da177e4SLinus Torvalds * is the node we want to try and remove. 12821da177e4SLinus Torvalds */ 12831da177e4SLinus Torvalds 12848ed67789SDaniel Lezcano static struct fib6_node *fib6_repair_tree(struct net *net, 12858ed67789SDaniel Lezcano struct fib6_node *fn) 12861da177e4SLinus Torvalds { 12871da177e4SLinus Torvalds int children; 12881da177e4SLinus Torvalds int nstate; 12891da177e4SLinus Torvalds struct fib6_node *child, *pn; 129094b2cfe0SHannes Frederic Sowa struct fib6_walker *w; 12911da177e4SLinus Torvalds int iter = 0; 12921da177e4SLinus Torvalds 12931da177e4SLinus Torvalds for (;;) { 12941da177e4SLinus Torvalds RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 12951da177e4SLinus Torvalds iter++; 12961da177e4SLinus Torvalds 1297547b792cSIlpo Järvinen WARN_ON(fn->fn_flags & RTN_RTINFO); 1298547b792cSIlpo Järvinen WARN_ON(fn->fn_flags & RTN_TL_ROOT); 129953b24b8fSIan Morris WARN_ON(fn->leaf); 13001da177e4SLinus Torvalds 13011da177e4SLinus Torvalds children = 0; 13021da177e4SLinus Torvalds child = NULL; 130349e253e3SWang Yufen if (fn->right) 130449e253e3SWang Yufen child = fn->right, children |= 1; 130549e253e3SWang Yufen if (fn->left) 130649e253e3SWang Yufen child = fn->left, children |= 2; 13071da177e4SLinus Torvalds 13087fc33165SYOSHIFUJI Hideaki if (children == 3 || FIB6_SUBTREE(fn) 13091da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 13101da177e4SLinus Torvalds /* Subtree root (i.e. fn) may have one child */ 13111da177e4SLinus Torvalds || (children && fn->fn_flags & RTN_ROOT) 13121da177e4SLinus Torvalds #endif 13131da177e4SLinus Torvalds ) { 13148ed67789SDaniel Lezcano fn->leaf = fib6_find_prefix(net, fn); 13151da177e4SLinus Torvalds #if RT6_DEBUG >= 2 1316507c9b1eSDavid S. Miller if (!fn->leaf) { 1317547b792cSIlpo Järvinen WARN_ON(!fn->leaf); 13188ed67789SDaniel Lezcano fn->leaf = net->ipv6.ip6_null_entry; 13191da177e4SLinus Torvalds } 13201da177e4SLinus Torvalds #endif 13211da177e4SLinus Torvalds atomic_inc(&fn->leaf->rt6i_ref); 13221da177e4SLinus Torvalds return fn->parent; 13231da177e4SLinus Torvalds } 13241da177e4SLinus Torvalds 13251da177e4SLinus Torvalds pn = fn->parent; 13261da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 13277fc33165SYOSHIFUJI Hideaki if (FIB6_SUBTREE(pn) == fn) { 1328547b792cSIlpo Järvinen WARN_ON(!(fn->fn_flags & RTN_ROOT)); 13297fc33165SYOSHIFUJI Hideaki FIB6_SUBTREE(pn) = NULL; 13301da177e4SLinus Torvalds nstate = FWS_L; 13311da177e4SLinus Torvalds } else { 1332547b792cSIlpo Järvinen WARN_ON(fn->fn_flags & RTN_ROOT); 13331da177e4SLinus Torvalds #endif 133449e253e3SWang Yufen if (pn->right == fn) 133549e253e3SWang Yufen pn->right = child; 133649e253e3SWang Yufen else if (pn->left == fn) 133749e253e3SWang Yufen pn->left = child; 13381da177e4SLinus Torvalds #if RT6_DEBUG >= 2 1339547b792cSIlpo Järvinen else 1340547b792cSIlpo Järvinen WARN_ON(1); 13411da177e4SLinus Torvalds #endif 13421da177e4SLinus Torvalds if (child) 13431da177e4SLinus Torvalds child->parent = pn; 13441da177e4SLinus Torvalds nstate = FWS_R; 13451da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 13461da177e4SLinus Torvalds } 13471da177e4SLinus Torvalds #endif 13481da177e4SLinus Torvalds 13499a03cd8fSMichal Kubeček read_lock(&net->ipv6.fib6_walker_lock); 13509a03cd8fSMichal Kubeček FOR_WALKERS(net, w) { 1351507c9b1eSDavid S. Miller if (!child) { 13521da177e4SLinus Torvalds if (w->root == fn) { 13531da177e4SLinus Torvalds w->root = w->node = NULL; 13541da177e4SLinus Torvalds RT6_TRACE("W %p adjusted by delroot 1\n", w); 13551da177e4SLinus Torvalds } else if (w->node == fn) { 13561da177e4SLinus Torvalds RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 13571da177e4SLinus Torvalds w->node = pn; 13581da177e4SLinus Torvalds w->state = nstate; 13591da177e4SLinus Torvalds } 13601da177e4SLinus Torvalds } else { 13611da177e4SLinus Torvalds if (w->root == fn) { 13621da177e4SLinus Torvalds w->root = child; 13631da177e4SLinus Torvalds RT6_TRACE("W %p adjusted by delroot 2\n", w); 13641da177e4SLinus Torvalds } 13651da177e4SLinus Torvalds if (w->node == fn) { 13661da177e4SLinus Torvalds w->node = child; 13671da177e4SLinus Torvalds if (children&2) { 13681da177e4SLinus Torvalds RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 13691da177e4SLinus Torvalds w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; 13701da177e4SLinus Torvalds } else { 13711da177e4SLinus Torvalds RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 13721da177e4SLinus Torvalds w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; 13731da177e4SLinus Torvalds } 13741da177e4SLinus Torvalds } 13751da177e4SLinus Torvalds } 13761da177e4SLinus Torvalds } 13779a03cd8fSMichal Kubeček read_unlock(&net->ipv6.fib6_walker_lock); 13781da177e4SLinus Torvalds 13791da177e4SLinus Torvalds node_free(fn); 13807fc33165SYOSHIFUJI Hideaki if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 13811da177e4SLinus Torvalds return pn; 13821da177e4SLinus Torvalds 13831da177e4SLinus Torvalds rt6_release(pn->leaf); 13841da177e4SLinus Torvalds pn->leaf = NULL; 13851da177e4SLinus Torvalds fn = pn; 13861da177e4SLinus Torvalds } 13871da177e4SLinus Torvalds } 13881da177e4SLinus Torvalds 13891da177e4SLinus Torvalds static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 139086872cb5SThomas Graf struct nl_info *info) 13911da177e4SLinus Torvalds { 139294b2cfe0SHannes Frederic Sowa struct fib6_walker *w; 13931da177e4SLinus Torvalds struct rt6_info *rt = *rtp; 1394c572872fSBenjamin Thery struct net *net = info->nl_net; 13951da177e4SLinus Torvalds 13961da177e4SLinus Torvalds RT6_TRACE("fib6_del_route\n"); 13971da177e4SLinus Torvalds 13981da177e4SLinus Torvalds /* Unlink it */ 1399d8d1f30bSChangli Gao *rtp = rt->dst.rt6_next; 14001da177e4SLinus Torvalds rt->rt6i_node = NULL; 1401c572872fSBenjamin Thery net->ipv6.rt6_stats->fib_rt_entries--; 1402c572872fSBenjamin Thery net->ipv6.rt6_stats->fib_discarded_routes++; 14031da177e4SLinus Torvalds 1404f11e6659SDavid S. Miller /* Reset round-robin state, if necessary */ 1405f11e6659SDavid S. Miller if (fn->rr_ptr == rt) 1406f11e6659SDavid S. Miller fn->rr_ptr = NULL; 1407f11e6659SDavid S. Miller 140851ebd318SNicolas Dichtel /* Remove this entry from other siblings */ 140951ebd318SNicolas Dichtel if (rt->rt6i_nsiblings) { 141051ebd318SNicolas Dichtel struct rt6_info *sibling, *next_sibling; 141151ebd318SNicolas Dichtel 141251ebd318SNicolas Dichtel list_for_each_entry_safe(sibling, next_sibling, 141351ebd318SNicolas Dichtel &rt->rt6i_siblings, rt6i_siblings) 141451ebd318SNicolas Dichtel sibling->rt6i_nsiblings--; 141551ebd318SNicolas Dichtel rt->rt6i_nsiblings = 0; 141651ebd318SNicolas Dichtel list_del_init(&rt->rt6i_siblings); 141751ebd318SNicolas Dichtel } 141851ebd318SNicolas Dichtel 14191da177e4SLinus Torvalds /* Adjust walkers */ 14209a03cd8fSMichal Kubeček read_lock(&net->ipv6.fib6_walker_lock); 14219a03cd8fSMichal Kubeček FOR_WALKERS(net, w) { 14221da177e4SLinus Torvalds if (w->state == FWS_C && w->leaf == rt) { 14231da177e4SLinus Torvalds RT6_TRACE("walker %p adjusted by delroute\n", w); 1424d8d1f30bSChangli Gao w->leaf = rt->dst.rt6_next; 1425507c9b1eSDavid S. Miller if (!w->leaf) 14261da177e4SLinus Torvalds w->state = FWS_U; 14271da177e4SLinus Torvalds } 14281da177e4SLinus Torvalds } 14299a03cd8fSMichal Kubeček read_unlock(&net->ipv6.fib6_walker_lock); 14301da177e4SLinus Torvalds 1431d8d1f30bSChangli Gao rt->dst.rt6_next = NULL; 14321da177e4SLinus Torvalds 14331da177e4SLinus Torvalds /* If it was last route, expunge its radix tree node */ 1434507c9b1eSDavid S. Miller if (!fn->leaf) { 14351da177e4SLinus Torvalds fn->fn_flags &= ~RTN_RTINFO; 1436c572872fSBenjamin Thery net->ipv6.rt6_stats->fib_route_nodes--; 14378ed67789SDaniel Lezcano fn = fib6_repair_tree(net, fn); 14381da177e4SLinus Torvalds } 14391da177e4SLinus Torvalds 14406e9e16e6SHannes Frederic Sowa fib6_purge_rt(rt, fn, net); 14411da177e4SLinus Torvalds 144237a1d361SRoopa Prabhu inet6_rt_notify(RTM_DELROUTE, rt, info, 0); 14431da177e4SLinus Torvalds rt6_release(rt); 14441da177e4SLinus Torvalds } 14451da177e4SLinus Torvalds 144686872cb5SThomas Graf int fib6_del(struct rt6_info *rt, struct nl_info *info) 14471da177e4SLinus Torvalds { 14488ed67789SDaniel Lezcano struct net *net = info->nl_net; 14491da177e4SLinus Torvalds struct fib6_node *fn = rt->rt6i_node; 14501da177e4SLinus Torvalds struct rt6_info **rtp; 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds #if RT6_DEBUG >= 2 1453d8d1f30bSChangli Gao if (rt->dst.obsolete > 0) { 145453b24b8fSIan Morris WARN_ON(fn); 14551da177e4SLinus Torvalds return -ENOENT; 14561da177e4SLinus Torvalds } 14571da177e4SLinus Torvalds #endif 1458507c9b1eSDavid S. Miller if (!fn || rt == net->ipv6.ip6_null_entry) 14591da177e4SLinus Torvalds return -ENOENT; 14601da177e4SLinus Torvalds 1461547b792cSIlpo Järvinen WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 14621da177e4SLinus Torvalds 1463150730d5SYOSHIFUJI Hideaki if (!(rt->rt6i_flags & RTF_CACHE)) { 1464150730d5SYOSHIFUJI Hideaki struct fib6_node *pn = fn; 1465150730d5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES 1466150730d5SYOSHIFUJI Hideaki /* clones of this route might be in another subtree */ 1467150730d5SYOSHIFUJI Hideaki if (rt->rt6i_src.plen) { 1468150730d5SYOSHIFUJI Hideaki while (!(pn->fn_flags & RTN_ROOT)) 1469150730d5SYOSHIFUJI Hideaki pn = pn->parent; 1470150730d5SYOSHIFUJI Hideaki pn = pn->parent; 1471150730d5SYOSHIFUJI Hideaki } 1472150730d5SYOSHIFUJI Hideaki #endif 1473163cd4e8SDuan Jiong fib6_prune_clones(info->nl_net, pn); 1474150730d5SYOSHIFUJI Hideaki } 14751da177e4SLinus Torvalds 14761da177e4SLinus Torvalds /* 14771da177e4SLinus Torvalds * Walk the leaf entries looking for ourself 14781da177e4SLinus Torvalds */ 14791da177e4SLinus Torvalds 1480d8d1f30bSChangli Gao for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { 14811da177e4SLinus Torvalds if (*rtp == rt) { 148286872cb5SThomas Graf fib6_del_route(fn, rtp, info); 14831da177e4SLinus Torvalds return 0; 14841da177e4SLinus Torvalds } 14851da177e4SLinus Torvalds } 14861da177e4SLinus Torvalds return -ENOENT; 14871da177e4SLinus Torvalds } 14881da177e4SLinus Torvalds 14891da177e4SLinus Torvalds /* 14901da177e4SLinus Torvalds * Tree traversal function. 14911da177e4SLinus Torvalds * 14921da177e4SLinus Torvalds * Certainly, it is not interrupt safe. 14931da177e4SLinus Torvalds * However, it is internally reenterable wrt itself and fib6_add/fib6_del. 14941da177e4SLinus Torvalds * It means, that we can modify tree during walking 14951da177e4SLinus Torvalds * and use this function for garbage collection, clone pruning, 14961da177e4SLinus Torvalds * cleaning tree when a device goes down etc. etc. 14971da177e4SLinus Torvalds * 14981da177e4SLinus Torvalds * It guarantees that every node will be traversed, 14991da177e4SLinus Torvalds * and that it will be traversed only once. 15001da177e4SLinus Torvalds * 15011da177e4SLinus Torvalds * Callback function w->func may return: 15021da177e4SLinus Torvalds * 0 -> continue walking. 15031da177e4SLinus Torvalds * positive value -> walking is suspended (used by tree dumps, 15041da177e4SLinus Torvalds * and probably by gc, if it will be split to several slices) 15051da177e4SLinus Torvalds * negative value -> terminate walking. 15061da177e4SLinus Torvalds * 15071da177e4SLinus Torvalds * The function itself returns: 15081da177e4SLinus Torvalds * 0 -> walk is complete. 15091da177e4SLinus Torvalds * >0 -> walk is incomplete (i.e. suspended) 15101da177e4SLinus Torvalds * <0 -> walk is terminated by an error. 15111da177e4SLinus Torvalds */ 15121da177e4SLinus Torvalds 151394b2cfe0SHannes Frederic Sowa static int fib6_walk_continue(struct fib6_walker *w) 15141da177e4SLinus Torvalds { 15151da177e4SLinus Torvalds struct fib6_node *fn, *pn; 15161da177e4SLinus Torvalds 15171da177e4SLinus Torvalds for (;;) { 15181da177e4SLinus Torvalds fn = w->node; 1519507c9b1eSDavid S. Miller if (!fn) 15201da177e4SLinus Torvalds return 0; 15211da177e4SLinus Torvalds 15221da177e4SLinus Torvalds if (w->prune && fn != w->root && 15231da177e4SLinus Torvalds fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { 15241da177e4SLinus Torvalds w->state = FWS_C; 15251da177e4SLinus Torvalds w->leaf = fn->leaf; 15261da177e4SLinus Torvalds } 15271da177e4SLinus Torvalds switch (w->state) { 15281da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 15291da177e4SLinus Torvalds case FWS_S: 15307fc33165SYOSHIFUJI Hideaki if (FIB6_SUBTREE(fn)) { 15317fc33165SYOSHIFUJI Hideaki w->node = FIB6_SUBTREE(fn); 15321da177e4SLinus Torvalds continue; 15331da177e4SLinus Torvalds } 15341da177e4SLinus Torvalds w->state = FWS_L; 15351da177e4SLinus Torvalds #endif 15361da177e4SLinus Torvalds case FWS_L: 15371da177e4SLinus Torvalds if (fn->left) { 15381da177e4SLinus Torvalds w->node = fn->left; 15391da177e4SLinus Torvalds w->state = FWS_INIT; 15401da177e4SLinus Torvalds continue; 15411da177e4SLinus Torvalds } 15421da177e4SLinus Torvalds w->state = FWS_R; 15431da177e4SLinus Torvalds case FWS_R: 15441da177e4SLinus Torvalds if (fn->right) { 15451da177e4SLinus Torvalds w->node = fn->right; 15461da177e4SLinus Torvalds w->state = FWS_INIT; 15471da177e4SLinus Torvalds continue; 15481da177e4SLinus Torvalds } 15491da177e4SLinus Torvalds w->state = FWS_C; 15501da177e4SLinus Torvalds w->leaf = fn->leaf; 15511da177e4SLinus Torvalds case FWS_C: 15521da177e4SLinus Torvalds if (w->leaf && fn->fn_flags & RTN_RTINFO) { 15532bec5a36SPatrick McHardy int err; 15542bec5a36SPatrick McHardy 1555fa809e2fSEric Dumazet if (w->skip) { 1556fa809e2fSEric Dumazet w->skip--; 15571c265854SKumar Sundararajan goto skip; 15582bec5a36SPatrick McHardy } 15592bec5a36SPatrick McHardy 15602bec5a36SPatrick McHardy err = w->func(w); 15611da177e4SLinus Torvalds if (err) 15621da177e4SLinus Torvalds return err; 15632bec5a36SPatrick McHardy 15642bec5a36SPatrick McHardy w->count++; 15651da177e4SLinus Torvalds continue; 15661da177e4SLinus Torvalds } 15671c265854SKumar Sundararajan skip: 15681da177e4SLinus Torvalds w->state = FWS_U; 15691da177e4SLinus Torvalds case FWS_U: 15701da177e4SLinus Torvalds if (fn == w->root) 15711da177e4SLinus Torvalds return 0; 15721da177e4SLinus Torvalds pn = fn->parent; 15731da177e4SLinus Torvalds w->node = pn; 15741da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 15757fc33165SYOSHIFUJI Hideaki if (FIB6_SUBTREE(pn) == fn) { 1576547b792cSIlpo Järvinen WARN_ON(!(fn->fn_flags & RTN_ROOT)); 15771da177e4SLinus Torvalds w->state = FWS_L; 15781da177e4SLinus Torvalds continue; 15791da177e4SLinus Torvalds } 15801da177e4SLinus Torvalds #endif 15811da177e4SLinus Torvalds if (pn->left == fn) { 15821da177e4SLinus Torvalds w->state = FWS_R; 15831da177e4SLinus Torvalds continue; 15841da177e4SLinus Torvalds } 15851da177e4SLinus Torvalds if (pn->right == fn) { 15861da177e4SLinus Torvalds w->state = FWS_C; 15871da177e4SLinus Torvalds w->leaf = w->node->leaf; 15881da177e4SLinus Torvalds continue; 15891da177e4SLinus Torvalds } 15901da177e4SLinus Torvalds #if RT6_DEBUG >= 2 1591547b792cSIlpo Järvinen WARN_ON(1); 15921da177e4SLinus Torvalds #endif 15931da177e4SLinus Torvalds } 15941da177e4SLinus Torvalds } 15951da177e4SLinus Torvalds } 15961da177e4SLinus Torvalds 15979a03cd8fSMichal Kubeček static int fib6_walk(struct net *net, struct fib6_walker *w) 15981da177e4SLinus Torvalds { 15991da177e4SLinus Torvalds int res; 16001da177e4SLinus Torvalds 16011da177e4SLinus Torvalds w->state = FWS_INIT; 16021da177e4SLinus Torvalds w->node = w->root; 16031da177e4SLinus Torvalds 16049a03cd8fSMichal Kubeček fib6_walker_link(net, w); 16051da177e4SLinus Torvalds res = fib6_walk_continue(w); 16061da177e4SLinus Torvalds if (res <= 0) 16079a03cd8fSMichal Kubeček fib6_walker_unlink(net, w); 16081da177e4SLinus Torvalds return res; 16091da177e4SLinus Torvalds } 16101da177e4SLinus Torvalds 161194b2cfe0SHannes Frederic Sowa static int fib6_clean_node(struct fib6_walker *w) 16121da177e4SLinus Torvalds { 16131da177e4SLinus Torvalds int res; 16141da177e4SLinus Torvalds struct rt6_info *rt; 161594b2cfe0SHannes Frederic Sowa struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); 1616ec7d43c2SBenjamin Thery struct nl_info info = { 1617ec7d43c2SBenjamin Thery .nl_net = c->net, 1618ec7d43c2SBenjamin Thery }; 16191da177e4SLinus Torvalds 1620327571cbSHannes Frederic Sowa if (c->sernum != FIB6_NO_SERNUM_CHANGE && 1621327571cbSHannes Frederic Sowa w->node->fn_sernum != c->sernum) 1622327571cbSHannes Frederic Sowa w->node->fn_sernum = c->sernum; 1623327571cbSHannes Frederic Sowa 1624327571cbSHannes Frederic Sowa if (!c->func) { 1625327571cbSHannes Frederic Sowa WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); 1626327571cbSHannes Frederic Sowa w->leaf = NULL; 1627327571cbSHannes Frederic Sowa return 0; 1628327571cbSHannes Frederic Sowa } 1629327571cbSHannes Frederic Sowa 1630d8d1f30bSChangli Gao for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 16311da177e4SLinus Torvalds res = c->func(rt, c->arg); 16321da177e4SLinus Torvalds if (res < 0) { 16331da177e4SLinus Torvalds w->leaf = rt; 1634528c4cebSDenis V. Lunev res = fib6_del(rt, &info); 16351da177e4SLinus Torvalds if (res) { 16361da177e4SLinus Torvalds #if RT6_DEBUG >= 2 163791df42beSJoe Perches pr_debug("%s: del failed: rt=%p@%p err=%d\n", 163891df42beSJoe Perches __func__, rt, rt->rt6i_node, res); 16391da177e4SLinus Torvalds #endif 16401da177e4SLinus Torvalds continue; 16411da177e4SLinus Torvalds } 16421da177e4SLinus Torvalds return 0; 16431da177e4SLinus Torvalds } 1644547b792cSIlpo Järvinen WARN_ON(res != 0); 16451da177e4SLinus Torvalds } 16461da177e4SLinus Torvalds w->leaf = rt; 16471da177e4SLinus Torvalds return 0; 16481da177e4SLinus Torvalds } 16491da177e4SLinus Torvalds 16501da177e4SLinus Torvalds /* 16511da177e4SLinus Torvalds * Convenient frontend to tree walker. 16521da177e4SLinus Torvalds * 16531da177e4SLinus Torvalds * func is called on each route. 16541da177e4SLinus Torvalds * It may return -1 -> delete this route. 16551da177e4SLinus Torvalds * 0 -> continue walking 16561da177e4SLinus Torvalds * 16571da177e4SLinus Torvalds * prune==1 -> only immediate children of node (certainly, 16581da177e4SLinus Torvalds * ignoring pure split nodes) will be scanned. 16591da177e4SLinus Torvalds */ 16601da177e4SLinus Torvalds 1661ec7d43c2SBenjamin Thery static void fib6_clean_tree(struct net *net, struct fib6_node *root, 16621da177e4SLinus Torvalds int (*func)(struct rt6_info *, void *arg), 1663327571cbSHannes Frederic Sowa bool prune, int sernum, void *arg) 16641da177e4SLinus Torvalds { 166594b2cfe0SHannes Frederic Sowa struct fib6_cleaner c; 16661da177e4SLinus Torvalds 16671da177e4SLinus Torvalds c.w.root = root; 16681da177e4SLinus Torvalds c.w.func = fib6_clean_node; 16691da177e4SLinus Torvalds c.w.prune = prune; 16702bec5a36SPatrick McHardy c.w.count = 0; 16712bec5a36SPatrick McHardy c.w.skip = 0; 16721da177e4SLinus Torvalds c.func = func; 1673327571cbSHannes Frederic Sowa c.sernum = sernum; 16741da177e4SLinus Torvalds c.arg = arg; 1675ec7d43c2SBenjamin Thery c.net = net; 16761da177e4SLinus Torvalds 16779a03cd8fSMichal Kubeček fib6_walk(net, &c.w); 16781da177e4SLinus Torvalds } 16791da177e4SLinus Torvalds 1680327571cbSHannes Frederic Sowa static void __fib6_clean_all(struct net *net, 1681327571cbSHannes Frederic Sowa int (*func)(struct rt6_info *, void *), 1682327571cbSHannes Frederic Sowa int sernum, void *arg) 1683c71099acSThomas Graf { 1684c71099acSThomas Graf struct fib6_table *table; 168558f09b78SDaniel Lezcano struct hlist_head *head; 16861b43af54SPatrick McHardy unsigned int h; 1687c71099acSThomas Graf 16881b43af54SPatrick McHardy rcu_read_lock(); 1689a33bc5c1SNeil Horman for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 1690f3db4851SDaniel Lezcano head = &net->ipv6.fib_table_hash[h]; 1691b67bfe0dSSasha Levin hlist_for_each_entry_rcu(table, head, tb6_hlist) { 1692c71099acSThomas Graf write_lock_bh(&table->tb6_lock); 1693ec7d43c2SBenjamin Thery fib6_clean_tree(net, &table->tb6_root, 1694327571cbSHannes Frederic Sowa func, false, sernum, arg); 1695c71099acSThomas Graf write_unlock_bh(&table->tb6_lock); 1696c71099acSThomas Graf } 1697c71099acSThomas Graf } 16981b43af54SPatrick McHardy rcu_read_unlock(); 1699c71099acSThomas Graf } 1700c71099acSThomas Graf 1701327571cbSHannes Frederic Sowa void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), 1702327571cbSHannes Frederic Sowa void *arg) 1703327571cbSHannes Frederic Sowa { 1704327571cbSHannes Frederic Sowa __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); 1705327571cbSHannes Frederic Sowa } 1706327571cbSHannes Frederic Sowa 17071da177e4SLinus Torvalds static int fib6_prune_clone(struct rt6_info *rt, void *arg) 17081da177e4SLinus Torvalds { 17091da177e4SLinus Torvalds if (rt->rt6i_flags & RTF_CACHE) { 17101da177e4SLinus Torvalds RT6_TRACE("pruning clone %p\n", rt); 17111da177e4SLinus Torvalds return -1; 17121da177e4SLinus Torvalds } 17131da177e4SLinus Torvalds 17141da177e4SLinus Torvalds return 0; 17151da177e4SLinus Torvalds } 17161da177e4SLinus Torvalds 1717163cd4e8SDuan Jiong static void fib6_prune_clones(struct net *net, struct fib6_node *fn) 17181da177e4SLinus Torvalds { 1719327571cbSHannes Frederic Sowa fib6_clean_tree(net, fn, fib6_prune_clone, true, 1720327571cbSHannes Frederic Sowa FIB6_NO_SERNUM_CHANGE, NULL); 1721705f1c86SHannes Frederic Sowa } 1722705f1c86SHannes Frederic Sowa 1723705f1c86SHannes Frederic Sowa static void fib6_flush_trees(struct net *net) 1724705f1c86SHannes Frederic Sowa { 1725812918c4SHannes Frederic Sowa int new_sernum = fib6_new_sernum(net); 1726705f1c86SHannes Frederic Sowa 1727327571cbSHannes Frederic Sowa __fib6_clean_all(net, NULL, new_sernum, NULL); 1728705f1c86SHannes Frederic Sowa } 1729705f1c86SHannes Frederic Sowa 17301da177e4SLinus Torvalds /* 17311da177e4SLinus Torvalds * Garbage collection 17321da177e4SLinus Torvalds */ 17331da177e4SLinus Torvalds 17343570df91SMichal Kubeček struct fib6_gc_args 17351da177e4SLinus Torvalds { 17361da177e4SLinus Torvalds int timeout; 17371da177e4SLinus Torvalds int more; 17383570df91SMichal Kubeček }; 17391da177e4SLinus Torvalds 17401da177e4SLinus Torvalds static int fib6_age(struct rt6_info *rt, void *arg) 17411da177e4SLinus Torvalds { 17423570df91SMichal Kubeček struct fib6_gc_args *gc_args = arg; 17431da177e4SLinus Torvalds unsigned long now = jiffies; 17441da177e4SLinus Torvalds 17451da177e4SLinus Torvalds /* 17461da177e4SLinus Torvalds * check addrconf expiration here. 17471da177e4SLinus Torvalds * Routes are expired even if they are in use. 17481da177e4SLinus Torvalds * 17491da177e4SLinus Torvalds * Also age clones. Note, that clones are aged out 17501da177e4SLinus Torvalds * only if they are not in use now. 17511da177e4SLinus Torvalds */ 17521da177e4SLinus Torvalds 1753d1918542SDavid S. Miller if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { 1754d1918542SDavid S. Miller if (time_after(now, rt->dst.expires)) { 17551da177e4SLinus Torvalds RT6_TRACE("expiring %p\n", rt); 17561da177e4SLinus Torvalds return -1; 17571da177e4SLinus Torvalds } 17583570df91SMichal Kubeček gc_args->more++; 17591da177e4SLinus Torvalds } else if (rt->rt6i_flags & RTF_CACHE) { 1760d8d1f30bSChangli Gao if (atomic_read(&rt->dst.__refcnt) == 0 && 17613570df91SMichal Kubeček time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { 17621da177e4SLinus Torvalds RT6_TRACE("aging clone %p\n", rt); 17631da177e4SLinus Torvalds return -1; 17645339ab8bSDavid S. Miller } else if (rt->rt6i_flags & RTF_GATEWAY) { 17655339ab8bSDavid S. Miller struct neighbour *neigh; 17665339ab8bSDavid S. Miller __u8 neigh_flags = 0; 17675339ab8bSDavid S. Miller 17685339ab8bSDavid S. Miller neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); 17695339ab8bSDavid S. Miller if (neigh) { 17705339ab8bSDavid S. Miller neigh_flags = neigh->flags; 17715339ab8bSDavid S. Miller neigh_release(neigh); 17725339ab8bSDavid S. Miller } 17738bd74516SThomas Graf if (!(neigh_flags & NTF_ROUTER)) { 17741da177e4SLinus Torvalds RT6_TRACE("purging route %p via non-router but gateway\n", 17751da177e4SLinus Torvalds rt); 17761da177e4SLinus Torvalds return -1; 17771da177e4SLinus Torvalds } 17785339ab8bSDavid S. Miller } 17793570df91SMichal Kubeček gc_args->more++; 17801da177e4SLinus Torvalds } 17811da177e4SLinus Torvalds 17821da177e4SLinus Torvalds return 0; 17831da177e4SLinus Torvalds } 17841da177e4SLinus Torvalds 17852ac3ac8fSMichal Kubeček void fib6_run_gc(unsigned long expires, struct net *net, bool force) 17861da177e4SLinus Torvalds { 17873570df91SMichal Kubeček struct fib6_gc_args gc_args; 178849a18d86SMichal Kubeček unsigned long now; 178949a18d86SMichal Kubeček 17902ac3ac8fSMichal Kubeček if (force) { 17913dc94f93SMichal Kubeček spin_lock_bh(&net->ipv6.fib6_gc_lock); 17923dc94f93SMichal Kubeček } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) { 1793417f28bbSStephen Hemminger mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); 17941da177e4SLinus Torvalds return; 17951da177e4SLinus Torvalds } 17962ac3ac8fSMichal Kubeček gc_args.timeout = expires ? (int)expires : 17972ac3ac8fSMichal Kubeček net->ipv6.sysctl.ip6_rt_gc_interval; 17981da177e4SLinus Torvalds 17993d0f24a7SStephen Hemminger gc_args.more = icmp6_dst_gc(); 1800f3db4851SDaniel Lezcano 18013570df91SMichal Kubeček fib6_clean_all(net, fib6_age, &gc_args); 180249a18d86SMichal Kubeček now = jiffies; 180349a18d86SMichal Kubeček net->ipv6.ip6_rt_last_gc = now; 18041da177e4SLinus Torvalds 18051da177e4SLinus Torvalds if (gc_args.more) 1806c8a45222SStephen Hemminger mod_timer(&net->ipv6.ip6_fib_timer, 180749a18d86SMichal Kubeček round_jiffies(now 1808c8a45222SStephen Hemminger + net->ipv6.sysctl.ip6_rt_gc_interval)); 1809417f28bbSStephen Hemminger else 1810417f28bbSStephen Hemminger del_timer(&net->ipv6.ip6_fib_timer); 18113dc94f93SMichal Kubeček spin_unlock_bh(&net->ipv6.fib6_gc_lock); 18121da177e4SLinus Torvalds } 18131da177e4SLinus Torvalds 18145b7c931dSDaniel Lezcano static void fib6_gc_timer_cb(unsigned long arg) 18155b7c931dSDaniel Lezcano { 18162ac3ac8fSMichal Kubeček fib6_run_gc(0, (struct net *)arg, true); 18175b7c931dSDaniel Lezcano } 18185b7c931dSDaniel Lezcano 18192c8c1e72SAlexey Dobriyan static int __net_init fib6_net_init(struct net *net) 182058f09b78SDaniel Lezcano { 182110da66f7SEric Dumazet size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; 182210da66f7SEric Dumazet 18233dc94f93SMichal Kubeček spin_lock_init(&net->ipv6.fib6_gc_lock); 18249a03cd8fSMichal Kubeček rwlock_init(&net->ipv6.fib6_walker_lock); 18259a03cd8fSMichal Kubeček INIT_LIST_HEAD(&net->ipv6.fib6_walkers); 1826417f28bbSStephen Hemminger setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 182763152fc0SDaniel Lezcano 1828c572872fSBenjamin Thery net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1829c572872fSBenjamin Thery if (!net->ipv6.rt6_stats) 1830c572872fSBenjamin Thery goto out_timer; 1831c572872fSBenjamin Thery 183210da66f7SEric Dumazet /* Avoid false sharing : Use at least a full cache line */ 183310da66f7SEric Dumazet size = max_t(size_t, size, L1_CACHE_BYTES); 183410da66f7SEric Dumazet 183510da66f7SEric Dumazet net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); 183658f09b78SDaniel Lezcano if (!net->ipv6.fib_table_hash) 1837c572872fSBenjamin Thery goto out_rt6_stats; 183858f09b78SDaniel Lezcano 183958f09b78SDaniel Lezcano net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), 184058f09b78SDaniel Lezcano GFP_KERNEL); 184158f09b78SDaniel Lezcano if (!net->ipv6.fib6_main_tbl) 184258f09b78SDaniel Lezcano goto out_fib_table_hash; 184358f09b78SDaniel Lezcano 184458f09b78SDaniel Lezcano net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; 18458ed67789SDaniel Lezcano net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 184658f09b78SDaniel Lezcano net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 184758f09b78SDaniel Lezcano RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 18488e773277SDavid S. Miller inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); 184958f09b78SDaniel Lezcano 185058f09b78SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 185158f09b78SDaniel Lezcano net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), 185258f09b78SDaniel Lezcano GFP_KERNEL); 185358f09b78SDaniel Lezcano if (!net->ipv6.fib6_local_tbl) 185458f09b78SDaniel Lezcano goto out_fib6_main_tbl; 185558f09b78SDaniel Lezcano net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; 18568ed67789SDaniel Lezcano net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 185758f09b78SDaniel Lezcano net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 185858f09b78SDaniel Lezcano RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 18598e773277SDavid S. Miller inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); 186058f09b78SDaniel Lezcano #endif 186158f09b78SDaniel Lezcano fib6_tables_init(net); 186258f09b78SDaniel Lezcano 1863417f28bbSStephen Hemminger return 0; 186458f09b78SDaniel Lezcano 186558f09b78SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 186658f09b78SDaniel Lezcano out_fib6_main_tbl: 186758f09b78SDaniel Lezcano kfree(net->ipv6.fib6_main_tbl); 186858f09b78SDaniel Lezcano #endif 186958f09b78SDaniel Lezcano out_fib_table_hash: 187058f09b78SDaniel Lezcano kfree(net->ipv6.fib_table_hash); 1871c572872fSBenjamin Thery out_rt6_stats: 1872c572872fSBenjamin Thery kfree(net->ipv6.rt6_stats); 187363152fc0SDaniel Lezcano out_timer: 1874417f28bbSStephen Hemminger return -ENOMEM; 187558f09b78SDaniel Lezcano } 187658f09b78SDaniel Lezcano 187758f09b78SDaniel Lezcano static void fib6_net_exit(struct net *net) 187858f09b78SDaniel Lezcano { 18798ed67789SDaniel Lezcano rt6_ifdown(net, NULL); 1880417f28bbSStephen Hemminger del_timer_sync(&net->ipv6.ip6_fib_timer); 1881417f28bbSStephen Hemminger 188258f09b78SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 18838e773277SDavid S. Miller inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); 188458f09b78SDaniel Lezcano kfree(net->ipv6.fib6_local_tbl); 188558f09b78SDaniel Lezcano #endif 18868e773277SDavid S. Miller inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); 188758f09b78SDaniel Lezcano kfree(net->ipv6.fib6_main_tbl); 188858f09b78SDaniel Lezcano kfree(net->ipv6.fib_table_hash); 1889c572872fSBenjamin Thery kfree(net->ipv6.rt6_stats); 189058f09b78SDaniel Lezcano } 189158f09b78SDaniel Lezcano 189258f09b78SDaniel Lezcano static struct pernet_operations fib6_net_ops = { 189358f09b78SDaniel Lezcano .init = fib6_net_init, 189458f09b78SDaniel Lezcano .exit = fib6_net_exit, 189558f09b78SDaniel Lezcano }; 189658f09b78SDaniel Lezcano 1897d63bddbeSDaniel Lezcano int __init fib6_init(void) 18981da177e4SLinus Torvalds { 1899e0b85590SDaniel Lezcano int ret = -ENOMEM; 190063152fc0SDaniel Lezcano 19011da177e4SLinus Torvalds fib6_node_kmem = kmem_cache_create("fib6_nodes", 19021da177e4SLinus Torvalds sizeof(struct fib6_node), 1903f845ab6bSDaniel Lezcano 0, SLAB_HWCACHE_ALIGN, 190420c2df83SPaul Mundt NULL); 1905f845ab6bSDaniel Lezcano if (!fib6_node_kmem) 1906e0b85590SDaniel Lezcano goto out; 1907e0b85590SDaniel Lezcano 190858f09b78SDaniel Lezcano ret = register_pernet_subsys(&fib6_net_ops); 190958f09b78SDaniel Lezcano if (ret) 1910c572872fSBenjamin Thery goto out_kmem_cache_create; 1911e8803b6cSDavid S. Miller 1912e8803b6cSDavid S. Miller ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, 1913e8803b6cSDavid S. Miller NULL); 1914e8803b6cSDavid S. Miller if (ret) 1915e8803b6cSDavid S. Miller goto out_unregister_subsys; 1916705f1c86SHannes Frederic Sowa 1917705f1c86SHannes Frederic Sowa __fib6_flush_trees = fib6_flush_trees; 1918d63bddbeSDaniel Lezcano out: 1919d63bddbeSDaniel Lezcano return ret; 1920d63bddbeSDaniel Lezcano 1921e8803b6cSDavid S. Miller out_unregister_subsys: 1922e8803b6cSDavid S. Miller unregister_pernet_subsys(&fib6_net_ops); 1923d63bddbeSDaniel Lezcano out_kmem_cache_create: 1924d63bddbeSDaniel Lezcano kmem_cache_destroy(fib6_node_kmem); 1925d63bddbeSDaniel Lezcano goto out; 19261da177e4SLinus Torvalds } 19271da177e4SLinus Torvalds 19281da177e4SLinus Torvalds void fib6_gc_cleanup(void) 19291da177e4SLinus Torvalds { 193058f09b78SDaniel Lezcano unregister_pernet_subsys(&fib6_net_ops); 19311da177e4SLinus Torvalds kmem_cache_destroy(fib6_node_kmem); 19321da177e4SLinus Torvalds } 19338d2ca1d7SHannes Frederic Sowa 19348d2ca1d7SHannes Frederic Sowa #ifdef CONFIG_PROC_FS 19358d2ca1d7SHannes Frederic Sowa 19368d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter { 19378d2ca1d7SHannes Frederic Sowa struct seq_net_private p; 193894b2cfe0SHannes Frederic Sowa struct fib6_walker w; 19398d2ca1d7SHannes Frederic Sowa loff_t skip; 19408d2ca1d7SHannes Frederic Sowa struct fib6_table *tbl; 194142b18706SHannes Frederic Sowa int sernum; 19428d2ca1d7SHannes Frederic Sowa }; 19438d2ca1d7SHannes Frederic Sowa 19448d2ca1d7SHannes Frederic Sowa static int ipv6_route_seq_show(struct seq_file *seq, void *v) 19458d2ca1d7SHannes Frederic Sowa { 19468d2ca1d7SHannes Frederic Sowa struct rt6_info *rt = v; 19478d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter *iter = seq->private; 19488d2ca1d7SHannes Frederic Sowa 19498d2ca1d7SHannes Frederic Sowa seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 19508d2ca1d7SHannes Frederic Sowa 19518d2ca1d7SHannes Frederic Sowa #ifdef CONFIG_IPV6_SUBTREES 19528d2ca1d7SHannes Frederic Sowa seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 19538d2ca1d7SHannes Frederic Sowa #else 19548d2ca1d7SHannes Frederic Sowa seq_puts(seq, "00000000000000000000000000000000 00 "); 19558d2ca1d7SHannes Frederic Sowa #endif 19568d2ca1d7SHannes Frederic Sowa if (rt->rt6i_flags & RTF_GATEWAY) 19578d2ca1d7SHannes Frederic Sowa seq_printf(seq, "%pi6", &rt->rt6i_gateway); 19588d2ca1d7SHannes Frederic Sowa else 19598d2ca1d7SHannes Frederic Sowa seq_puts(seq, "00000000000000000000000000000000"); 19608d2ca1d7SHannes Frederic Sowa 19618d2ca1d7SHannes Frederic Sowa seq_printf(seq, " %08x %08x %08x %08x %8s\n", 19628d2ca1d7SHannes Frederic Sowa rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 19638d2ca1d7SHannes Frederic Sowa rt->dst.__use, rt->rt6i_flags, 19648d2ca1d7SHannes Frederic Sowa rt->dst.dev ? rt->dst.dev->name : ""); 19658d2ca1d7SHannes Frederic Sowa iter->w.leaf = NULL; 19668d2ca1d7SHannes Frederic Sowa return 0; 19678d2ca1d7SHannes Frederic Sowa } 19688d2ca1d7SHannes Frederic Sowa 196994b2cfe0SHannes Frederic Sowa static int ipv6_route_yield(struct fib6_walker *w) 19708d2ca1d7SHannes Frederic Sowa { 19718d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter *iter = w->args; 19728d2ca1d7SHannes Frederic Sowa 19738d2ca1d7SHannes Frederic Sowa if (!iter->skip) 19748d2ca1d7SHannes Frederic Sowa return 1; 19758d2ca1d7SHannes Frederic Sowa 19768d2ca1d7SHannes Frederic Sowa do { 19778d2ca1d7SHannes Frederic Sowa iter->w.leaf = iter->w.leaf->dst.rt6_next; 19788d2ca1d7SHannes Frederic Sowa iter->skip--; 19798d2ca1d7SHannes Frederic Sowa if (!iter->skip && iter->w.leaf) 19808d2ca1d7SHannes Frederic Sowa return 1; 19818d2ca1d7SHannes Frederic Sowa } while (iter->w.leaf); 19828d2ca1d7SHannes Frederic Sowa 19838d2ca1d7SHannes Frederic Sowa return 0; 19848d2ca1d7SHannes Frederic Sowa } 19858d2ca1d7SHannes Frederic Sowa 19869a03cd8fSMichal Kubeček static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, 19879a03cd8fSMichal Kubeček struct net *net) 19888d2ca1d7SHannes Frederic Sowa { 19898d2ca1d7SHannes Frederic Sowa memset(&iter->w, 0, sizeof(iter->w)); 19908d2ca1d7SHannes Frederic Sowa iter->w.func = ipv6_route_yield; 19918d2ca1d7SHannes Frederic Sowa iter->w.root = &iter->tbl->tb6_root; 19928d2ca1d7SHannes Frederic Sowa iter->w.state = FWS_INIT; 19938d2ca1d7SHannes Frederic Sowa iter->w.node = iter->w.root; 19948d2ca1d7SHannes Frederic Sowa iter->w.args = iter; 19950a67d3efSHannes Frederic Sowa iter->sernum = iter->w.root->fn_sernum; 19968d2ca1d7SHannes Frederic Sowa INIT_LIST_HEAD(&iter->w.lh); 19979a03cd8fSMichal Kubeček fib6_walker_link(net, &iter->w); 19988d2ca1d7SHannes Frederic Sowa } 19998d2ca1d7SHannes Frederic Sowa 20008d2ca1d7SHannes Frederic Sowa static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, 20018d2ca1d7SHannes Frederic Sowa struct net *net) 20028d2ca1d7SHannes Frederic Sowa { 20038d2ca1d7SHannes Frederic Sowa unsigned int h; 20048d2ca1d7SHannes Frederic Sowa struct hlist_node *node; 20058d2ca1d7SHannes Frederic Sowa 20068d2ca1d7SHannes Frederic Sowa if (tbl) { 20078d2ca1d7SHannes Frederic Sowa h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; 20088d2ca1d7SHannes Frederic Sowa node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); 20098d2ca1d7SHannes Frederic Sowa } else { 20108d2ca1d7SHannes Frederic Sowa h = 0; 20118d2ca1d7SHannes Frederic Sowa node = NULL; 20128d2ca1d7SHannes Frederic Sowa } 20138d2ca1d7SHannes Frederic Sowa 20148d2ca1d7SHannes Frederic Sowa while (!node && h < FIB6_TABLE_HASHSZ) { 20158d2ca1d7SHannes Frederic Sowa node = rcu_dereference_bh( 20168d2ca1d7SHannes Frederic Sowa hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); 20178d2ca1d7SHannes Frederic Sowa } 20188d2ca1d7SHannes Frederic Sowa return hlist_entry_safe(node, struct fib6_table, tb6_hlist); 20198d2ca1d7SHannes Frederic Sowa } 20208d2ca1d7SHannes Frederic Sowa 20210a67d3efSHannes Frederic Sowa static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) 20220a67d3efSHannes Frederic Sowa { 20230a67d3efSHannes Frederic Sowa if (iter->sernum != iter->w.root->fn_sernum) { 20240a67d3efSHannes Frederic Sowa iter->sernum = iter->w.root->fn_sernum; 20250a67d3efSHannes Frederic Sowa iter->w.state = FWS_INIT; 20260a67d3efSHannes Frederic Sowa iter->w.node = iter->w.root; 20270a67d3efSHannes Frederic Sowa WARN_ON(iter->w.skip); 20280a67d3efSHannes Frederic Sowa iter->w.skip = iter->w.count; 20290a67d3efSHannes Frederic Sowa } 20300a67d3efSHannes Frederic Sowa } 20310a67d3efSHannes Frederic Sowa 20328d2ca1d7SHannes Frederic Sowa static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 20338d2ca1d7SHannes Frederic Sowa { 20348d2ca1d7SHannes Frederic Sowa int r; 20358d2ca1d7SHannes Frederic Sowa struct rt6_info *n; 20368d2ca1d7SHannes Frederic Sowa struct net *net = seq_file_net(seq); 20378d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter *iter = seq->private; 20388d2ca1d7SHannes Frederic Sowa 20398d2ca1d7SHannes Frederic Sowa if (!v) 20408d2ca1d7SHannes Frederic Sowa goto iter_table; 20418d2ca1d7SHannes Frederic Sowa 20428d2ca1d7SHannes Frederic Sowa n = ((struct rt6_info *)v)->dst.rt6_next; 20438d2ca1d7SHannes Frederic Sowa if (n) { 20448d2ca1d7SHannes Frederic Sowa ++*pos; 20458d2ca1d7SHannes Frederic Sowa return n; 20468d2ca1d7SHannes Frederic Sowa } 20478d2ca1d7SHannes Frederic Sowa 20488d2ca1d7SHannes Frederic Sowa iter_table: 20490a67d3efSHannes Frederic Sowa ipv6_route_check_sernum(iter); 20508d2ca1d7SHannes Frederic Sowa read_lock(&iter->tbl->tb6_lock); 20518d2ca1d7SHannes Frederic Sowa r = fib6_walk_continue(&iter->w); 20528d2ca1d7SHannes Frederic Sowa read_unlock(&iter->tbl->tb6_lock); 20538d2ca1d7SHannes Frederic Sowa if (r > 0) { 20548d2ca1d7SHannes Frederic Sowa if (v) 20558d2ca1d7SHannes Frederic Sowa ++*pos; 20568d2ca1d7SHannes Frederic Sowa return iter->w.leaf; 20578d2ca1d7SHannes Frederic Sowa } else if (r < 0) { 20589a03cd8fSMichal Kubeček fib6_walker_unlink(net, &iter->w); 20598d2ca1d7SHannes Frederic Sowa return NULL; 20608d2ca1d7SHannes Frederic Sowa } 20619a03cd8fSMichal Kubeček fib6_walker_unlink(net, &iter->w); 20628d2ca1d7SHannes Frederic Sowa 20638d2ca1d7SHannes Frederic Sowa iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); 20648d2ca1d7SHannes Frederic Sowa if (!iter->tbl) 20658d2ca1d7SHannes Frederic Sowa return NULL; 20668d2ca1d7SHannes Frederic Sowa 20679a03cd8fSMichal Kubeček ipv6_route_seq_setup_walk(iter, net); 20688d2ca1d7SHannes Frederic Sowa goto iter_table; 20698d2ca1d7SHannes Frederic Sowa } 20708d2ca1d7SHannes Frederic Sowa 20718d2ca1d7SHannes Frederic Sowa static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) 20728d2ca1d7SHannes Frederic Sowa __acquires(RCU_BH) 20738d2ca1d7SHannes Frederic Sowa { 20748d2ca1d7SHannes Frederic Sowa struct net *net = seq_file_net(seq); 20758d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter *iter = seq->private; 20768d2ca1d7SHannes Frederic Sowa 20778d2ca1d7SHannes Frederic Sowa rcu_read_lock_bh(); 20788d2ca1d7SHannes Frederic Sowa iter->tbl = ipv6_route_seq_next_table(NULL, net); 20798d2ca1d7SHannes Frederic Sowa iter->skip = *pos; 20808d2ca1d7SHannes Frederic Sowa 20818d2ca1d7SHannes Frederic Sowa if (iter->tbl) { 20829a03cd8fSMichal Kubeček ipv6_route_seq_setup_walk(iter, net); 20838d2ca1d7SHannes Frederic Sowa return ipv6_route_seq_next(seq, NULL, pos); 20848d2ca1d7SHannes Frederic Sowa } else { 20858d2ca1d7SHannes Frederic Sowa return NULL; 20868d2ca1d7SHannes Frederic Sowa } 20878d2ca1d7SHannes Frederic Sowa } 20888d2ca1d7SHannes Frederic Sowa 20898d2ca1d7SHannes Frederic Sowa static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) 20908d2ca1d7SHannes Frederic Sowa { 209194b2cfe0SHannes Frederic Sowa struct fib6_walker *w = &iter->w; 20928d2ca1d7SHannes Frederic Sowa return w->node && !(w->state == FWS_U && w->node == w->root); 20938d2ca1d7SHannes Frederic Sowa } 20948d2ca1d7SHannes Frederic Sowa 20958d2ca1d7SHannes Frederic Sowa static void ipv6_route_seq_stop(struct seq_file *seq, void *v) 20968d2ca1d7SHannes Frederic Sowa __releases(RCU_BH) 20978d2ca1d7SHannes Frederic Sowa { 20989a03cd8fSMichal Kubeček struct net *net = seq_file_net(seq); 20998d2ca1d7SHannes Frederic Sowa struct ipv6_route_iter *iter = seq->private; 21008d2ca1d7SHannes Frederic Sowa 21018d2ca1d7SHannes Frederic Sowa if (ipv6_route_iter_active(iter)) 21029a03cd8fSMichal Kubeček fib6_walker_unlink(net, &iter->w); 21038d2ca1d7SHannes Frederic Sowa 21048d2ca1d7SHannes Frederic Sowa rcu_read_unlock_bh(); 21058d2ca1d7SHannes Frederic Sowa } 21068d2ca1d7SHannes Frederic Sowa 21078d2ca1d7SHannes Frederic Sowa static const struct seq_operations ipv6_route_seq_ops = { 21088d2ca1d7SHannes Frederic Sowa .start = ipv6_route_seq_start, 21098d2ca1d7SHannes Frederic Sowa .next = ipv6_route_seq_next, 21108d2ca1d7SHannes Frederic Sowa .stop = ipv6_route_seq_stop, 21118d2ca1d7SHannes Frederic Sowa .show = ipv6_route_seq_show 21128d2ca1d7SHannes Frederic Sowa }; 21138d2ca1d7SHannes Frederic Sowa 21148d2ca1d7SHannes Frederic Sowa int ipv6_route_open(struct inode *inode, struct file *file) 21158d2ca1d7SHannes Frederic Sowa { 21168d2ca1d7SHannes Frederic Sowa return seq_open_net(inode, file, &ipv6_route_seq_ops, 21178d2ca1d7SHannes Frederic Sowa sizeof(struct ipv6_route_iter)); 21188d2ca1d7SHannes Frederic Sowa } 21198d2ca1d7SHannes Frederic Sowa 21208d2ca1d7SHannes Frederic Sowa #endif /* CONFIG_PROC_FS */ 2121