xref: /openbmc/linux/include/net/vxlan.h (revision e7f70af1)
1012a5729SPravin B Shelar #ifndef __NET_VXLAN_H
2012a5729SPravin B Shelar #define __NET_VXLAN_H 1
3012a5729SPravin B Shelar 
45f35227eSJesse Gross #include <linux/ip.h>
55f35227eSJesse Gross #include <linux/ipv6.h>
65f35227eSJesse Gross #include <linux/if_vlan.h>
7012a5729SPravin B Shelar #include <linux/skbuff.h>
8012a5729SPravin B Shelar #include <linux/netdevice.h>
9012a5729SPravin B Shelar #include <linux/udp.h>
10ee122c79SThomas Graf #include <net/dst_metadata.h>
11012a5729SPravin B Shelar 
12828788acSJiri Benc /* VXLAN protocol (RFC 7348) header:
133511494cSThomas Graf  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
14828788acSJiri Benc  * |R|R|R|R|I|R|R|R|               Reserved                        |
153511494cSThomas Graf  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
163511494cSThomas Graf  * |                VXLAN Network Identifier (VNI) |   Reserved    |
173511494cSThomas Graf  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
183511494cSThomas Graf  *
19828788acSJiri Benc  * I = VXLAN Network Identifier (VNI) present.
20828788acSJiri Benc  */
21828788acSJiri Benc struct vxlanhdr {
22828788acSJiri Benc 	__be32 vx_flags;
23828788acSJiri Benc 	__be32 vx_vni;
24828788acSJiri Benc };
25828788acSJiri Benc 
26828788acSJiri Benc /* VXLAN header flags. */
2754bfd872SJiri Benc #define VXLAN_HF_VNI	cpu_to_be32(BIT(27))
28828788acSJiri Benc 
29828788acSJiri Benc #define VXLAN_N_VID     (1u << 24)
30828788acSJiri Benc #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
3154bfd872SJiri Benc #define VXLAN_VNI_MASK	cpu_to_be32(VXLAN_VID_MASK << 8)
32828788acSJiri Benc #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
33828788acSJiri Benc 
34828788acSJiri Benc #define VNI_HASH_BITS	10
35828788acSJiri Benc #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
36828788acSJiri Benc #define FDB_HASH_BITS	8
37828788acSJiri Benc #define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)
38828788acSJiri Benc 
39828788acSJiri Benc /* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X):
40828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
41828788acSJiri Benc  * |R|R|R|R|I|R|R|R|R|R|C|              Reserved                   |
42828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
43828788acSJiri Benc  * |           VXLAN Network Identifier (VNI)      |O| Csum start  |
44828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
45828788acSJiri Benc  *
46828788acSJiri Benc  * C = Remote checksum offload bit. When set indicates that the
47828788acSJiri Benc  *     remote checksum offload data is present.
48828788acSJiri Benc  *
49828788acSJiri Benc  * O = Offset bit. Indicates the checksum offset relative to
50828788acSJiri Benc  *     checksum start.
51828788acSJiri Benc  *
52828788acSJiri Benc  * Csum start = Checksum start divided by two.
53828788acSJiri Benc  *
54828788acSJiri Benc  * http://tools.ietf.org/html/draft-herbert-vxlan-rco
55828788acSJiri Benc  */
56828788acSJiri Benc 
57828788acSJiri Benc /* VXLAN-RCO header flags. */
5854bfd872SJiri Benc #define VXLAN_HF_RCO	cpu_to_be32(BIT(21))
59828788acSJiri Benc 
60828788acSJiri Benc /* Remote checksum offload header option */
6154bfd872SJiri Benc #define VXLAN_RCO_MASK	cpu_to_be32(0x7f)  /* Last byte of vni field */
6254bfd872SJiri Benc #define VXLAN_RCO_UDP	cpu_to_be32(0x80)  /* Indicate UDP RCO (TCP when not set *) */
63828788acSJiri Benc #define VXLAN_RCO_SHIFT	1		   /* Left shift of start */
64828788acSJiri Benc #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
6554bfd872SJiri Benc #define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
66828788acSJiri Benc 
67828788acSJiri Benc /*
68828788acSJiri Benc  * VXLAN Group Based Policy Extension (VXLAN_F_GBP):
69828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
70828788acSJiri Benc  * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
71828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
72828788acSJiri Benc  * |                VXLAN Network Identifier (VNI) |   Reserved    |
73828788acSJiri Benc  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
74828788acSJiri Benc  *
75828788acSJiri Benc  * G = Group Policy ID present.
76828788acSJiri Benc  *
773511494cSThomas Graf  * D = Don't Learn bit. When set, this bit indicates that the egress
783511494cSThomas Graf  *     VTEP MUST NOT learn the source address of the encapsulated frame.
793511494cSThomas Graf  *
803511494cSThomas Graf  * A = Indicates that the group policy has already been applied to
813511494cSThomas Graf  *     this packet. Policies MUST NOT be applied by devices when the
823511494cSThomas Graf  *     A bit is set.
833511494cSThomas Graf  *
84828788acSJiri Benc  * https://tools.ietf.org/html/draft-smith-vxlan-group-policy
853511494cSThomas Graf  */
863511494cSThomas Graf struct vxlanhdr_gbp {
870e715d6fSJiri Benc 	u8	vx_flags;
883511494cSThomas Graf #ifdef __LITTLE_ENDIAN_BITFIELD
890e715d6fSJiri Benc 	u8	reserved_flags1:3,
903511494cSThomas Graf 		policy_applied:1,
913511494cSThomas Graf 		reserved_flags2:2,
923511494cSThomas Graf 		dont_learn:1,
933511494cSThomas Graf 		reserved_flags3:1;
943511494cSThomas Graf #elif defined(__BIG_ENDIAN_BITFIELD)
950e715d6fSJiri Benc 	u8	reserved_flags1:1,
963511494cSThomas Graf 		dont_learn:1,
973511494cSThomas Graf 		reserved_flags2:2,
983511494cSThomas Graf 		policy_applied:1,
993511494cSThomas Graf 		reserved_flags3:3;
1003511494cSThomas Graf #else
1013511494cSThomas Graf #error	"Please fix <asm/byteorder.h>"
1023511494cSThomas Graf #endif
1033511494cSThomas Graf 	__be16	policy_id;
1043511494cSThomas Graf 	__be32	vx_vni;
1053511494cSThomas Graf };
1063511494cSThomas Graf 
107828788acSJiri Benc /* VXLAN-GBP header flags. */
10854bfd872SJiri Benc #define VXLAN_HF_GBP	cpu_to_be32(BIT(31))
109828788acSJiri Benc 
11054bfd872SJiri Benc #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
1113511494cSThomas Graf 
1123511494cSThomas Graf /* skb->mark mapping
1133511494cSThomas Graf  *
1143511494cSThomas Graf  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1153511494cSThomas Graf  * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
1163511494cSThomas Graf  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1173511494cSThomas Graf  */
1183511494cSThomas Graf #define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
1193511494cSThomas Graf #define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
1203511494cSThomas Graf #define VXLAN_GBP_ID_MASK		(0xFFFF)
1213511494cSThomas Graf 
1223511494cSThomas Graf struct vxlan_metadata {
1233511494cSThomas Graf 	u32		gbp;
1243511494cSThomas Graf };
1253511494cSThomas Graf 
126012a5729SPravin B Shelar /* per UDP socket information */
127012a5729SPravin B Shelar struct vxlan_sock {
128012a5729SPravin B Shelar 	struct hlist_node hlist;
129012a5729SPravin B Shelar 	struct work_struct del_work;
130012a5729SPravin B Shelar 	struct socket	 *sock;
131012a5729SPravin B Shelar 	struct rcu_head	  rcu;
132012a5729SPravin B Shelar 	struct hlist_head vni_list[VNI_HASH_SIZE];
133012a5729SPravin B Shelar 	atomic_t	  refcnt;
134dc01e7d3SOr Gerlitz 	struct udp_offload udp_offloads;
135dfd8645eSTom Herbert 	u32		  flags;
136012a5729SPravin B Shelar };
137012a5729SPravin B Shelar 
1380dfbdf41SThomas Graf union vxlan_addr {
1390dfbdf41SThomas Graf 	struct sockaddr_in sin;
1400dfbdf41SThomas Graf 	struct sockaddr_in6 sin6;
1410dfbdf41SThomas Graf 	struct sockaddr sa;
1420dfbdf41SThomas Graf };
1430dfbdf41SThomas Graf 
1440dfbdf41SThomas Graf struct vxlan_rdst {
1450dfbdf41SThomas Graf 	union vxlan_addr	 remote_ip;
1460dfbdf41SThomas Graf 	__be16			 remote_port;
14754bfd872SJiri Benc 	__be32			 remote_vni;
1480dfbdf41SThomas Graf 	u32			 remote_ifindex;
1490dfbdf41SThomas Graf 	struct list_head	 list;
1500dfbdf41SThomas Graf 	struct rcu_head		 rcu;
1510c1d70afSPaolo Abeni 	struct dst_cache	 dst_cache;
1520dfbdf41SThomas Graf };
1530dfbdf41SThomas Graf 
1540dfbdf41SThomas Graf struct vxlan_config {
1550dfbdf41SThomas Graf 	union vxlan_addr	remote_ip;
1560dfbdf41SThomas Graf 	union vxlan_addr	saddr;
15754bfd872SJiri Benc 	__be32			vni;
1580dfbdf41SThomas Graf 	int			remote_ifindex;
1590dfbdf41SThomas Graf 	int			mtu;
1600dfbdf41SThomas Graf 	__be16			dst_port;
1610e715d6fSJiri Benc 	u16			port_min;
1620e715d6fSJiri Benc 	u16			port_max;
1630e715d6fSJiri Benc 	u8			tos;
1640e715d6fSJiri Benc 	u8			ttl;
165e7f70af1SDaniel Borkmann 	__be32			label;
1660dfbdf41SThomas Graf 	u32			flags;
1670dfbdf41SThomas Graf 	unsigned long		age_interval;
1680dfbdf41SThomas Graf 	unsigned int		addrmax;
1690dfbdf41SThomas Graf 	bool			no_share;
1700dfbdf41SThomas Graf };
1710dfbdf41SThomas Graf 
1720dfbdf41SThomas Graf /* Pseudo network device */
1730dfbdf41SThomas Graf struct vxlan_dev {
1740dfbdf41SThomas Graf 	struct hlist_node hlist;	/* vni hash table */
1750dfbdf41SThomas Graf 	struct list_head  next;		/* vxlan's per namespace list */
176b1be00a6SJiri Benc 	struct vxlan_sock *vn4_sock;	/* listening socket for IPv4 */
177b1be00a6SJiri Benc #if IS_ENABLED(CONFIG_IPV6)
178b1be00a6SJiri Benc 	struct vxlan_sock *vn6_sock;	/* listening socket for IPv6 */
179b1be00a6SJiri Benc #endif
1800dfbdf41SThomas Graf 	struct net_device *dev;
1810dfbdf41SThomas Graf 	struct net	  *net;		/* netns for packet i/o */
1820dfbdf41SThomas Graf 	struct vxlan_rdst default_dst;	/* default destination */
1830dfbdf41SThomas Graf 	u32		  flags;	/* VXLAN_F_* in vxlan.h */
1840dfbdf41SThomas Graf 
1850dfbdf41SThomas Graf 	struct timer_list age_timer;
1860dfbdf41SThomas Graf 	spinlock_t	  hash_lock;
1870dfbdf41SThomas Graf 	unsigned int	  addrcnt;
18858ce31ccSTom Herbert 	struct gro_cells  gro_cells;
1890dfbdf41SThomas Graf 
1900dfbdf41SThomas Graf 	struct vxlan_config	cfg;
1910dfbdf41SThomas Graf 
1920dfbdf41SThomas Graf 	struct hlist_head fdb_head[FDB_HASH_SIZE];
1930dfbdf41SThomas Graf };
1940dfbdf41SThomas Graf 
195359a0ea9STom Herbert #define VXLAN_F_LEARN			0x01
196359a0ea9STom Herbert #define VXLAN_F_PROXY			0x02
197359a0ea9STom Herbert #define VXLAN_F_RSC			0x04
198359a0ea9STom Herbert #define VXLAN_F_L2MISS			0x08
199359a0ea9STom Herbert #define VXLAN_F_L3MISS			0x10
200359a0ea9STom Herbert #define VXLAN_F_IPV6			0x20
2016ceb31caSAlexander Duyck #define VXLAN_F_UDP_ZERO_CSUM_TX	0x40
202359a0ea9STom Herbert #define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
203359a0ea9STom Herbert #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
204dfd8645eSTom Herbert #define VXLAN_F_REMCSUM_TX		0x200
205dfd8645eSTom Herbert #define VXLAN_F_REMCSUM_RX		0x400
2063511494cSThomas Graf #define VXLAN_F_GBP			0x800
2070ace2ca8STom Herbert #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
208ee122c79SThomas Graf #define VXLAN_F_COLLECT_METADATA	0x2000
209359a0ea9STom Herbert 
210d299ce14SSimon Horman /* Flags that are used in the receive path. These flags must match in
211af33c1adSTom Herbert  * order for a socket to be shareable
212af33c1adSTom Herbert  */
213af33c1adSTom Herbert #define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
214af33c1adSTom Herbert 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
2150ace2ca8STom Herbert 					 VXLAN_F_REMCSUM_RX |		\
216ee122c79SThomas Graf 					 VXLAN_F_REMCSUM_NOPARTIAL |	\
217da8b43c0SAlexei Starovoitov 					 VXLAN_F_COLLECT_METADATA)
218ac5132d1SThomas Graf 
2190dfbdf41SThomas Graf struct net_device *vxlan_dev_create(struct net *net, const char *name,
2200dfbdf41SThomas Graf 				    u8 name_assign_type, struct vxlan_config *conf);
2210dfbdf41SThomas Graf 
222b1be00a6SJiri Benc static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
223b1be00a6SJiri Benc 					unsigned short family)
224614732eaSThomas Graf {
225b1be00a6SJiri Benc #if IS_ENABLED(CONFIG_IPV6)
226b1be00a6SJiri Benc 	if (family == AF_INET6)
227b1be00a6SJiri Benc 		return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
228b1be00a6SJiri Benc #endif
229b1be00a6SJiri Benc 	return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
230614732eaSThomas Graf }
23149560532SPravin B Shelar 
2325f35227eSJesse Gross static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
2335f35227eSJesse Gross 						     netdev_features_t features)
23411bf7828SJoe Stringer {
2355f35227eSJesse Gross 	u8 l4_hdr = 0;
2365f35227eSJesse Gross 
2375f35227eSJesse Gross 	if (!skb->encapsulation)
2385f35227eSJesse Gross 		return features;
2395f35227eSJesse Gross 
2405f35227eSJesse Gross 	switch (vlan_get_protocol(skb)) {
2415f35227eSJesse Gross 	case htons(ETH_P_IP):
2425f35227eSJesse Gross 		l4_hdr = ip_hdr(skb)->protocol;
2435f35227eSJesse Gross 		break;
2445f35227eSJesse Gross 	case htons(ETH_P_IPV6):
2455f35227eSJesse Gross 		l4_hdr = ipv6_hdr(skb)->nexthdr;
2465f35227eSJesse Gross 		break;
2475f35227eSJesse Gross 	default:
2485f35227eSJesse Gross 		return features;;
2495f35227eSJesse Gross 	}
2505f35227eSJesse Gross 
2515f35227eSJesse Gross 	if ((l4_hdr == IPPROTO_UDP) &&
25211bf7828SJoe Stringer 	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
25311bf7828SJoe Stringer 	     skb->inner_protocol != htons(ETH_P_TEB) ||
25411bf7828SJoe Stringer 	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
25511bf7828SJoe Stringer 	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
256a188222bSTom Herbert 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
25711bf7828SJoe Stringer 
2585f35227eSJesse Gross 	return features;
25911bf7828SJoe Stringer }
26023e62de3SJoe Stringer 
261e6cd988cSJoseph Gasparakis /* IP header + UDP + VXLAN + Ethernet header */
262e6cd988cSJoseph Gasparakis #define VXLAN_HEADROOM (20 + 8 + 8 + 14)
263e6cd988cSJoseph Gasparakis /* IPv6 header + UDP + VXLAN + Ethernet header */
264e6cd988cSJoseph Gasparakis #define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
265e6cd988cSJoseph Gasparakis 
266d4ac05ffSJiri Benc static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
267d4ac05ffSJiri Benc {
268d4ac05ffSJiri Benc 	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
269d4ac05ffSJiri Benc }
270d4ac05ffSJiri Benc 
27154bfd872SJiri Benc static inline __be32 vxlan_vni(__be32 vni_field)
27254bfd872SJiri Benc {
27354bfd872SJiri Benc #if defined(__BIG_ENDIAN)
27454bfd872SJiri Benc 	return vni_field >> 8;
27554bfd872SJiri Benc #else
27654bfd872SJiri Benc 	return (vni_field & VXLAN_VNI_MASK) << 8;
27754bfd872SJiri Benc #endif
27854bfd872SJiri Benc }
27954bfd872SJiri Benc 
28054bfd872SJiri Benc static inline __be32 vxlan_vni_field(__be32 vni)
28154bfd872SJiri Benc {
28254bfd872SJiri Benc #if defined(__BIG_ENDIAN)
28354bfd872SJiri Benc 	return vni << 8;
28454bfd872SJiri Benc #else
28554bfd872SJiri Benc 	return vni >> 8;
28654bfd872SJiri Benc #endif
28754bfd872SJiri Benc }
28854bfd872SJiri Benc 
28954bfd872SJiri Benc static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
29054bfd872SJiri Benc {
29154bfd872SJiri Benc #if defined(__BIG_ENDIAN)
29254bfd872SJiri Benc 	return tun_id;
29354bfd872SJiri Benc #else
29454bfd872SJiri Benc 	return tun_id >> 32;
29554bfd872SJiri Benc #endif
29654bfd872SJiri Benc }
29754bfd872SJiri Benc 
29807dabf20SJiri Benc static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
29907dabf20SJiri Benc {
30007dabf20SJiri Benc #if defined(__BIG_ENDIAN)
30107dabf20SJiri Benc 	return (__be64)vni;
30207dabf20SJiri Benc #else
30307dabf20SJiri Benc 	return (__be64)vni << 32;
30407dabf20SJiri Benc #endif
30507dabf20SJiri Benc }
30607dabf20SJiri Benc 
30754bfd872SJiri Benc static inline size_t vxlan_rco_start(__be32 vni_field)
30854bfd872SJiri Benc {
30954bfd872SJiri Benc 	return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
31054bfd872SJiri Benc }
31154bfd872SJiri Benc 
31254bfd872SJiri Benc static inline size_t vxlan_rco_offset(__be32 vni_field)
31354bfd872SJiri Benc {
31454bfd872SJiri Benc 	return (vni_field & VXLAN_RCO_UDP) ?
31554bfd872SJiri Benc 		offsetof(struct udphdr, check) :
31654bfd872SJiri Benc 		offsetof(struct tcphdr, check);
31754bfd872SJiri Benc }
31854bfd872SJiri Benc 
31954bfd872SJiri Benc static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
32054bfd872SJiri Benc {
32154bfd872SJiri Benc 	__be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
32254bfd872SJiri Benc 
32354bfd872SJiri Benc 	if (offset == offsetof(struct udphdr, check))
32454bfd872SJiri Benc 		vni_field |= VXLAN_RCO_UDP;
32554bfd872SJiri Benc 	return vni_field;
32654bfd872SJiri Benc }
32754bfd872SJiri Benc 
328e6cd988cSJoseph Gasparakis #if IS_ENABLED(CONFIG_VXLAN)
32953cf5275SJoseph Gasparakis void vxlan_get_rx_port(struct net_device *netdev);
330e6cd988cSJoseph Gasparakis #else
331e6cd988cSJoseph Gasparakis static inline void vxlan_get_rx_port(struct net_device *netdev)
332e6cd988cSJoseph Gasparakis {
333e6cd988cSJoseph Gasparakis }
334e6cd988cSJoseph Gasparakis #endif
335705cc62fSJiri Benc 
336705cc62fSJiri Benc static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
337705cc62fSJiri Benc {
338705cc62fSJiri Benc 	return vs->sock->sk->sk_family;
339705cc62fSJiri Benc }
34048e92c44SJiri Benc 
34148e92c44SJiri Benc #endif
342