xref: /openbmc/linux/include/net/dst.h (revision 1c31720a74e19bb57f301350a3b03210fa2ba9e3)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * net/dst.h	Protocol independent destination cache definitions.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  */
71da177e4SLinus Torvalds 
81da177e4SLinus Torvalds #ifndef _NET_DST_H
91da177e4SLinus Torvalds #define _NET_DST_H
101da177e4SLinus Torvalds 
1186393e52SAlexey Dobriyan #include <net/dst_ops.h>
1214c85021SArnaldo Carvalho de Melo #include <linux/netdevice.h>
131da177e4SLinus Torvalds #include <linux/rtnetlink.h>
141da177e4SLinus Torvalds #include <linux/rcupdate.h>
151da177e4SLinus Torvalds #include <linux/jiffies.h>
161da177e4SLinus Torvalds #include <net/neighbour.h>
171da177e4SLinus Torvalds #include <asm/processor.h>
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds /*
201da177e4SLinus Torvalds  * 0 - no debugging messages
211da177e4SLinus Torvalds  * 1 - rare events and bugs (default)
221da177e4SLinus Torvalds  * 2 - trace mode.
231da177e4SLinus Torvalds  */
241da177e4SLinus Torvalds #define RT_CACHE_DEBUG		0
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds #define DST_GC_MIN	(HZ/10)
271da177e4SLinus Torvalds #define DST_GC_INC	(HZ/2)
281da177e4SLinus Torvalds #define DST_GC_MAX	(120*HZ)
291da177e4SLinus Torvalds 
301da177e4SLinus Torvalds /* Each dst_entry has reference count and sits in some parent list(s).
311da177e4SLinus Torvalds  * When it is removed from parent list, it is "freed" (dst_free).
321da177e4SLinus Torvalds  * After this it enters dead state (dst->obsolete > 0) and if its refcnt
331da177e4SLinus Torvalds  * is zero, it can be destroyed immediately, otherwise it is added
341da177e4SLinus Torvalds  * to gc list and garbage collector periodically checks the refcnt.
351da177e4SLinus Torvalds  */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds struct sk_buff;
381da177e4SLinus Torvalds 
39fd2c3ef7SEric Dumazet struct dst_entry {
401e19e02cSEric Dumazet 	struct rcu_head		rcu_head;
411da177e4SLinus Torvalds 	struct dst_entry	*child;
421da177e4SLinus Torvalds 	struct net_device       *dev;
43c4d54110SHerbert Xu 	short			error;
44c4d54110SHerbert Xu 	short			obsolete;
451da177e4SLinus Torvalds 	int			flags;
46c7d4426aSEric Dumazet #define DST_HOST		0x0001
47c7d4426aSEric Dumazet #define DST_NOXFRM		0x0002
48c7d4426aSEric Dumazet #define DST_NOPOLICY		0x0004
49c7d4426aSEric Dumazet #define DST_NOHASH		0x0008
50c7d4426aSEric Dumazet #define DST_NOCACHE		0x0010
511da177e4SLinus Torvalds 	unsigned long		expires;
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds 	unsigned short		header_len;	/* more space at head required */
541da177e4SLinus Torvalds 	unsigned short		trailer_len;	/* space to reserve at tail */
551da177e4SLinus Torvalds 
5669a73829SEric Dumazet 	unsigned int		rate_tokens;
57f1dd9c37SZhang Yanmin 	unsigned long		rate_last;	/* rate limiting for ICMP */
5869a73829SEric Dumazet 
59f1dd9c37SZhang Yanmin 	struct dst_entry	*path;
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds 	struct neighbour	*neighbour;
621da177e4SLinus Torvalds 	struct hh_cache		*hh;
63def8b4faSAlexey Dobriyan #ifdef CONFIG_XFRM
641da177e4SLinus Torvalds 	struct xfrm_state	*xfrm;
655635c10dSEric Dumazet #else
665635c10dSEric Dumazet 	void			*__pad1;
67def8b4faSAlexey Dobriyan #endif
681da177e4SLinus Torvalds 	int			(*input)(struct sk_buff*);
691da177e4SLinus Torvalds 	int			(*output)(struct sk_buff*);
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds 	struct  dst_ops	        *ops;
721da177e4SLinus Torvalds 
73f1dd9c37SZhang Yanmin 	u32			metrics[RTAX_MAX];
74f1dd9c37SZhang Yanmin 
75f1dd9c37SZhang Yanmin #ifdef CONFIG_NET_CLS_ROUTE
76f1dd9c37SZhang Yanmin 	__u32			tclassid;
775635c10dSEric Dumazet #else
785635c10dSEric Dumazet 	__u32			__pad2;
79f1dd9c37SZhang Yanmin #endif
80f1dd9c37SZhang Yanmin 
815635c10dSEric Dumazet 
825635c10dSEric Dumazet 	/*
835635c10dSEric Dumazet 	 * Align __refcnt to a 64 bytes alignment
845635c10dSEric Dumazet 	 * (L1_CACHE_SIZE would be too much)
855635c10dSEric Dumazet 	 */
865635c10dSEric Dumazet #ifdef CONFIG_64BIT
875635c10dSEric Dumazet 	long			__pad_to_align_refcnt[1];
885635c10dSEric Dumazet #endif
89f1dd9c37SZhang Yanmin 	/*
90f1dd9c37SZhang Yanmin 	 * __refcnt wants to be on a different cache line from
91f1dd9c37SZhang Yanmin 	 * input/output/ops or performance tanks badly
92f1dd9c37SZhang Yanmin 	 */
931e19e02cSEric Dumazet 	atomic_t		__refcnt;	/* client references	*/
941e19e02cSEric Dumazet 	int			__use;
95f1dd9c37SZhang Yanmin 	unsigned long		lastuse;
961e19e02cSEric Dumazet 	union {
971e19e02cSEric Dumazet 		struct dst_entry *next;
98*1c31720aSEric Dumazet 		struct rtable __rcu *rt_next;
991e19e02cSEric Dumazet 		struct rt6_info   *rt6_next;
1001e19e02cSEric Dumazet 		struct dn_route  *dn_next;
1011e19e02cSEric Dumazet 	};
1021da177e4SLinus Torvalds };
1031da177e4SLinus Torvalds 
1041da177e4SLinus Torvalds #ifdef __KERNEL__
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds static inline u32
1071da177e4SLinus Torvalds dst_metric(const struct dst_entry *dst, int metric)
1081da177e4SLinus Torvalds {
1091da177e4SLinus Torvalds 	return dst->metrics[metric-1];
1101da177e4SLinus Torvalds }
1111da177e4SLinus Torvalds 
1120c3adfb8SGilad Ben-Yossef static inline u32
1130c3adfb8SGilad Ben-Yossef dst_feature(const struct dst_entry *dst, u32 feature)
1140c3adfb8SGilad Ben-Yossef {
115bb5b7c11SDavid S. Miller 	return dst_metric(dst, RTAX_FEATURES) & feature;
1160c3adfb8SGilad Ben-Yossef }
1170c3adfb8SGilad Ben-Yossef 
1181da177e4SLinus Torvalds static inline u32 dst_mtu(const struct dst_entry *dst)
1191da177e4SLinus Torvalds {
1201da177e4SLinus Torvalds 	u32 mtu = dst_metric(dst, RTAX_MTU);
1211da177e4SLinus Torvalds 	/*
1221da177e4SLinus Torvalds 	 * Alexey put it here, so ask him about it :)
1231da177e4SLinus Torvalds 	 */
1241da177e4SLinus Torvalds 	barrier();
1251da177e4SLinus Torvalds 	return mtu;
1261da177e4SLinus Torvalds }
1271da177e4SLinus Torvalds 
128c1e20f7cSStephen Hemminger /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
129c1e20f7cSStephen Hemminger static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
130c1e20f7cSStephen Hemminger {
131c1e20f7cSStephen Hemminger 	return msecs_to_jiffies(dst_metric(dst, metric));
132c1e20f7cSStephen Hemminger }
133c1e20f7cSStephen Hemminger 
134c1e20f7cSStephen Hemminger static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric,
135c1e20f7cSStephen Hemminger 				      unsigned long rtt)
136c1e20f7cSStephen Hemminger {
137c1e20f7cSStephen Hemminger 	dst->metrics[metric-1] = jiffies_to_msecs(rtt);
138c1e20f7cSStephen Hemminger }
139c1e20f7cSStephen Hemminger 
1401da177e4SLinus Torvalds static inline u32
1411da177e4SLinus Torvalds dst_allfrag(const struct dst_entry *dst)
1421da177e4SLinus Torvalds {
1430c3adfb8SGilad Ben-Yossef 	int ret = dst_feature(dst,  RTAX_FEATURE_ALLFRAG);
1441da177e4SLinus Torvalds 	/* Yes, _exactly_. This is paranoia. */
1451da177e4SLinus Torvalds 	barrier();
1461da177e4SLinus Torvalds 	return ret;
1471da177e4SLinus Torvalds }
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds static inline int
1501da177e4SLinus Torvalds dst_metric_locked(struct dst_entry *dst, int metric)
1511da177e4SLinus Torvalds {
1521da177e4SLinus Torvalds 	return dst_metric(dst, RTAX_LOCK) & (1<<metric);
1531da177e4SLinus Torvalds }
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds static inline void dst_hold(struct dst_entry * dst)
1561da177e4SLinus Torvalds {
1575635c10dSEric Dumazet 	/*
1585635c10dSEric Dumazet 	 * If your kernel compilation stops here, please check
1595635c10dSEric Dumazet 	 * __pad_to_align_refcnt declaration in struct dst_entry
1605635c10dSEric Dumazet 	 */
1615635c10dSEric Dumazet 	BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
1621da177e4SLinus Torvalds 	atomic_inc(&dst->__refcnt);
1631da177e4SLinus Torvalds }
1641da177e4SLinus Torvalds 
16503f49f34SPavel Emelyanov static inline void dst_use(struct dst_entry *dst, unsigned long time)
16603f49f34SPavel Emelyanov {
16703f49f34SPavel Emelyanov 	dst_hold(dst);
16803f49f34SPavel Emelyanov 	dst->__use++;
16903f49f34SPavel Emelyanov 	dst->lastuse = time;
17003f49f34SPavel Emelyanov }
17103f49f34SPavel Emelyanov 
1727fee226aSEric Dumazet static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
1737fee226aSEric Dumazet {
1747fee226aSEric Dumazet 	dst->__use++;
1757fee226aSEric Dumazet 	dst->lastuse = time;
1767fee226aSEric Dumazet }
1777fee226aSEric Dumazet 
1781da177e4SLinus Torvalds static inline
1791da177e4SLinus Torvalds struct dst_entry * dst_clone(struct dst_entry * dst)
1801da177e4SLinus Torvalds {
1811da177e4SLinus Torvalds 	if (dst)
1821da177e4SLinus Torvalds 		atomic_inc(&dst->__refcnt);
1831da177e4SLinus Torvalds 	return dst;
1841da177e4SLinus Torvalds }
1851da177e4SLinus Torvalds 
1868d330868SIlpo Järvinen extern void dst_release(struct dst_entry *dst);
1877fee226aSEric Dumazet 
1887fee226aSEric Dumazet static inline void refdst_drop(unsigned long refdst)
1897fee226aSEric Dumazet {
1907fee226aSEric Dumazet 	if (!(refdst & SKB_DST_NOREF))
1917fee226aSEric Dumazet 		dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
1927fee226aSEric Dumazet }
1937fee226aSEric Dumazet 
1947fee226aSEric Dumazet /**
1957fee226aSEric Dumazet  * skb_dst_drop - drops skb dst
1967fee226aSEric Dumazet  * @skb: buffer
1977fee226aSEric Dumazet  *
1987fee226aSEric Dumazet  * Drops dst reference count if a reference was taken.
1997fee226aSEric Dumazet  */
200adf30907SEric Dumazet static inline void skb_dst_drop(struct sk_buff *skb)
201adf30907SEric Dumazet {
2027fee226aSEric Dumazet 	if (skb->_skb_refdst) {
2037fee226aSEric Dumazet 		refdst_drop(skb->_skb_refdst);
2047fee226aSEric Dumazet 		skb->_skb_refdst = 0UL;
2057fee226aSEric Dumazet 	}
2067fee226aSEric Dumazet }
2077fee226aSEric Dumazet 
2087fee226aSEric Dumazet static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
2097fee226aSEric Dumazet {
2107fee226aSEric Dumazet 	nskb->_skb_refdst = oskb->_skb_refdst;
2117fee226aSEric Dumazet 	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
2127fee226aSEric Dumazet 		dst_clone(skb_dst(nskb));
2137fee226aSEric Dumazet }
2147fee226aSEric Dumazet 
2157fee226aSEric Dumazet /**
2167fee226aSEric Dumazet  * skb_dst_force - makes sure skb dst is refcounted
2177fee226aSEric Dumazet  * @skb: buffer
2187fee226aSEric Dumazet  *
2197fee226aSEric Dumazet  * If dst is not yet refcounted, let's do it
2207fee226aSEric Dumazet  */
2217fee226aSEric Dumazet static inline void skb_dst_force(struct sk_buff *skb)
2227fee226aSEric Dumazet {
2237fee226aSEric Dumazet 	if (skb_dst_is_noref(skb)) {
2247fee226aSEric Dumazet 		WARN_ON(!rcu_read_lock_held());
2257fee226aSEric Dumazet 		skb->_skb_refdst &= ~SKB_DST_NOREF;
2267fee226aSEric Dumazet 		dst_clone(skb_dst(skb));
2277fee226aSEric Dumazet 	}
228adf30907SEric Dumazet }
2291da177e4SLinus Torvalds 
230d19d56ddSEric Dumazet 
231d19d56ddSEric Dumazet /**
232290b895eSEric Dumazet  *	__skb_tunnel_rx - prepare skb for rx reinsert
233290b895eSEric Dumazet  *	@skb: buffer
234290b895eSEric Dumazet  *	@dev: tunnel device
235290b895eSEric Dumazet  *
236290b895eSEric Dumazet  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
237290b895eSEric Dumazet  *	so make some cleanups. (no accounting done)
238290b895eSEric Dumazet  */
239290b895eSEric Dumazet static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
240290b895eSEric Dumazet {
241290b895eSEric Dumazet 	skb->dev = dev;
242290b895eSEric Dumazet 	skb->rxhash = 0;
243290b895eSEric Dumazet 	skb_set_queue_mapping(skb, 0);
244290b895eSEric Dumazet 	skb_dst_drop(skb);
245290b895eSEric Dumazet 	nf_reset(skb);
246290b895eSEric Dumazet }
247290b895eSEric Dumazet 
248290b895eSEric Dumazet /**
249d19d56ddSEric Dumazet  *	skb_tunnel_rx - prepare skb for rx reinsert
250d19d56ddSEric Dumazet  *	@skb: buffer
251d19d56ddSEric Dumazet  *	@dev: tunnel device
252d19d56ddSEric Dumazet  *
253d19d56ddSEric Dumazet  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
254d19d56ddSEric Dumazet  *	so make some cleanups, and perform accounting.
255290b895eSEric Dumazet  *	Note: this accounting is not SMP safe.
256d19d56ddSEric Dumazet  */
257d19d56ddSEric Dumazet static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
258d19d56ddSEric Dumazet {
259d19d56ddSEric Dumazet 	/* TODO : stats should be SMP safe */
260d19d56ddSEric Dumazet 	dev->stats.rx_packets++;
261d19d56ddSEric Dumazet 	dev->stats.rx_bytes += skb->len;
262290b895eSEric Dumazet 	__skb_tunnel_rx(skb, dev);
263d19d56ddSEric Dumazet }
264d19d56ddSEric Dumazet 
2651da177e4SLinus Torvalds /* Children define the path of the packet through the
2661da177e4SLinus Torvalds  * Linux networking.  Thus, destinations are stackable.
2671da177e4SLinus Torvalds  */
2681da177e4SLinus Torvalds 
2698764ab2cSSteffen Klassert static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
2701da177e4SLinus Torvalds {
2718764ab2cSSteffen Klassert 	struct dst_entry *child = skb_dst(skb)->child;
2721da177e4SLinus Torvalds 
2738764ab2cSSteffen Klassert 	skb_dst_drop(skb);
2741da177e4SLinus Torvalds 	return child;
2751da177e4SLinus Torvalds }
2761da177e4SLinus Torvalds 
277352e512cSHerbert Xu extern int dst_discard(struct sk_buff *skb);
2781da177e4SLinus Torvalds extern void * dst_alloc(struct dst_ops * ops);
2791da177e4SLinus Torvalds extern void __dst_free(struct dst_entry * dst);
2801da177e4SLinus Torvalds extern struct dst_entry *dst_destroy(struct dst_entry * dst);
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds static inline void dst_free(struct dst_entry * dst)
2831da177e4SLinus Torvalds {
2841da177e4SLinus Torvalds 	if (dst->obsolete > 1)
2851da177e4SLinus Torvalds 		return;
2861da177e4SLinus Torvalds 	if (!atomic_read(&dst->__refcnt)) {
2871da177e4SLinus Torvalds 		dst = dst_destroy(dst);
2881da177e4SLinus Torvalds 		if (!dst)
2891da177e4SLinus Torvalds 			return;
2901da177e4SLinus Torvalds 	}
2911da177e4SLinus Torvalds 	__dst_free(dst);
2921da177e4SLinus Torvalds }
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds static inline void dst_rcu_free(struct rcu_head *head)
2951da177e4SLinus Torvalds {
2961da177e4SLinus Torvalds 	struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
2971da177e4SLinus Torvalds 	dst_free(dst);
2981da177e4SLinus Torvalds }
2991da177e4SLinus Torvalds 
3001da177e4SLinus Torvalds static inline void dst_confirm(struct dst_entry *dst)
3011da177e4SLinus Torvalds {
3021da177e4SLinus Torvalds 	if (dst)
3031da177e4SLinus Torvalds 		neigh_confirm(dst->neighbour);
3041da177e4SLinus Torvalds }
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds static inline void dst_link_failure(struct sk_buff *skb)
3071da177e4SLinus Torvalds {
308adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3091da177e4SLinus Torvalds 	if (dst && dst->ops && dst->ops->link_failure)
3101da177e4SLinus Torvalds 		dst->ops->link_failure(skb);
3111da177e4SLinus Torvalds }
3121da177e4SLinus Torvalds 
3131da177e4SLinus Torvalds static inline void dst_set_expires(struct dst_entry *dst, int timeout)
3141da177e4SLinus Torvalds {
3151da177e4SLinus Torvalds 	unsigned long expires = jiffies + timeout;
3161da177e4SLinus Torvalds 
3171da177e4SLinus Torvalds 	if (expires == 0)
3181da177e4SLinus Torvalds 		expires = 1;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 	if (dst->expires == 0 || time_before(expires, dst->expires))
3211da177e4SLinus Torvalds 		dst->expires = expires;
3221da177e4SLinus Torvalds }
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds /* Output packet to network from transport.  */
3251da177e4SLinus Torvalds static inline int dst_output(struct sk_buff *skb)
3261da177e4SLinus Torvalds {
327adf30907SEric Dumazet 	return skb_dst(skb)->output(skb);
3281da177e4SLinus Torvalds }
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds /* Input packet from network to transport.  */
3311da177e4SLinus Torvalds static inline int dst_input(struct sk_buff *skb)
3321da177e4SLinus Torvalds {
333adf30907SEric Dumazet 	return skb_dst(skb)->input(skb);
3341da177e4SLinus Torvalds }
3351da177e4SLinus Torvalds 
3361da177e4SLinus Torvalds static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
3371da177e4SLinus Torvalds {
3381da177e4SLinus Torvalds 	if (dst->obsolete)
3391da177e4SLinus Torvalds 		dst = dst->ops->check(dst, cookie);
3401da177e4SLinus Torvalds 	return dst;
3411da177e4SLinus Torvalds }
3421da177e4SLinus Torvalds 
3431da177e4SLinus Torvalds extern void		dst_init(void);
3441da177e4SLinus Torvalds 
345815f4e57SHerbert Xu /* Flags for xfrm_lookup flags argument. */
346815f4e57SHerbert Xu enum {
347815f4e57SHerbert Xu 	XFRM_LOOKUP_WAIT = 1 << 0,
3488b7817f3SHerbert Xu 	XFRM_LOOKUP_ICMP = 1 << 1,
349815f4e57SHerbert Xu };
350815f4e57SHerbert Xu 
3511da177e4SLinus Torvalds struct flowi;
3521da177e4SLinus Torvalds #ifndef CONFIG_XFRM
35352479b62SAlexey Dobriyan static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
35452479b62SAlexey Dobriyan 			      struct flowi *fl, struct sock *sk, int flags)
3551da177e4SLinus Torvalds {
3561da177e4SLinus Torvalds 	return 0;
3571da177e4SLinus Torvalds }
35852479b62SAlexey Dobriyan static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
35952479b62SAlexey Dobriyan 				struct flowi *fl, struct sock *sk, int flags)
36014e50e57SDavid S. Miller {
36114e50e57SDavid S. Miller 	return 0;
36214e50e57SDavid S. Miller }
3631da177e4SLinus Torvalds #else
36452479b62SAlexey Dobriyan extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
36552479b62SAlexey Dobriyan 		       struct flowi *fl, struct sock *sk, int flags);
36652479b62SAlexey Dobriyan extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
36752479b62SAlexey Dobriyan 			 struct flowi *fl, struct sock *sk, int flags);
3681da177e4SLinus Torvalds #endif
3691da177e4SLinus Torvalds #endif
3701da177e4SLinus Torvalds 
3711da177e4SLinus Torvalds #endif /* _NET_DST_H */
372