xref: /openbmc/linux/include/net/inet_hashtables.h (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
12874c5fdSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-or-later */
2304a1618SArnaldo Carvalho de Melo /*
3304a1618SArnaldo Carvalho de Melo  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4304a1618SArnaldo Carvalho de Melo  *		operating system.  INET is implemented using the BSD Socket
5304a1618SArnaldo Carvalho de Melo  *		interface as the means of communication with the user level.
6304a1618SArnaldo Carvalho de Melo  *
7304a1618SArnaldo Carvalho de Melo  * Authors:	Lotsa people, from code originally in tcp
8304a1618SArnaldo Carvalho de Melo  */
9304a1618SArnaldo Carvalho de Melo 
10304a1618SArnaldo Carvalho de Melo #ifndef _INET_HASHTABLES_H
11304a1618SArnaldo Carvalho de Melo #define _INET_HASHTABLES_H
12304a1618SArnaldo Carvalho de Melo 
138feaf0c0SArnaldo Carvalho de Melo 
142d8c4ce5SArnaldo Carvalho de Melo #include <linux/interrupt.h>
159a1f27c4SArnaldo Carvalho de Melo #include <linux/ip.h>
1633b62231SArnaldo Carvalho de Melo #include <linux/ipv6.h>
1777d8bf9cSArnaldo Carvalho de Melo #include <linux/list.h>
1877d8bf9cSArnaldo Carvalho de Melo #include <linux/slab.h>
1933b62231SArnaldo Carvalho de Melo #include <linux/socket.h>
2077d8bf9cSArnaldo Carvalho de Melo #include <linux/spinlock.h>
21304a1618SArnaldo Carvalho de Melo #include <linux/types.h>
22f3f05f70SArnaldo Carvalho de Melo #include <linux/wait.h>
23304a1618SArnaldo Carvalho de Melo 
24463c84b9SArnaldo Carvalho de Melo #include <net/inet_connection_sock.h>
2514c85021SArnaldo Carvalho de Melo #include <net/inet_sock.h>
2628044fc1SJoanne Koong #include <net/ip.h>
272d8c4ce5SArnaldo Carvalho de Melo #include <net/sock.h>
289a1f27c4SArnaldo Carvalho de Melo #include <net/route.h>
29c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
300b441916SPavel Emelyanov #include <net/netns/hash.h>
312d8c4ce5SArnaldo Carvalho de Melo 
3241c6d650SReshetova, Elena #include <linux/refcount.h>
33e48c414eSArnaldo Carvalho de Melo #include <asm/byteorder.h>
34f3f05f70SArnaldo Carvalho de Melo 
3577d8bf9cSArnaldo Carvalho de Melo /* This is for all connections with a full identity, no wildcards.
3605dbc7b5SEric Dumazet  * The 'e' prefix stands for Establish, but we really put all sockets
3705dbc7b5SEric Dumazet  * but LISTEN ones.
3877d8bf9cSArnaldo Carvalho de Melo  */
3977d8bf9cSArnaldo Carvalho de Melo struct inet_ehash_bucket {
403ab5aee7SEric Dumazet 	struct hlist_nulls_head chain;
416d255361SEric Dumazet };
4277d8bf9cSArnaldo Carvalho de Melo 
4377d8bf9cSArnaldo Carvalho de Melo /* There are a few simple rules, which allow for local port reuse by
4477d8bf9cSArnaldo Carvalho de Melo  * an application.  In essence:
4577d8bf9cSArnaldo Carvalho de Melo  *
4677d8bf9cSArnaldo Carvalho de Melo  *	1) Sockets bound to different interfaces may share a local port.
4777d8bf9cSArnaldo Carvalho de Melo  *	   Failing that, goto test 2.
4877d8bf9cSArnaldo Carvalho de Melo  *	2) If all sockets have sk->sk_reuse set, and none of them are in
4977d8bf9cSArnaldo Carvalho de Melo  *	   TCP_LISTEN state, the port may be shared.
5077d8bf9cSArnaldo Carvalho de Melo  *	   Failing that, goto test 3.
5177d8bf9cSArnaldo Carvalho de Melo  *	3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
5277d8bf9cSArnaldo Carvalho de Melo  *	   address, and none of them are the same, the port may be
5377d8bf9cSArnaldo Carvalho de Melo  *	   shared.
5477d8bf9cSArnaldo Carvalho de Melo  *	   Failing this, the port cannot be shared.
5577d8bf9cSArnaldo Carvalho de Melo  *
5677d8bf9cSArnaldo Carvalho de Melo  * The interesting point, is test #2.  This is what an FTP server does
5777d8bf9cSArnaldo Carvalho de Melo  * all day.  To optimize this case we use a specific flag bit defined
5877d8bf9cSArnaldo Carvalho de Melo  * below.  As we add sockets to a bind bucket list, we perform a
5977d8bf9cSArnaldo Carvalho de Melo  * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
6077d8bf9cSArnaldo Carvalho de Melo  * As long as all sockets added to a bind bucket pass this test,
6177d8bf9cSArnaldo Carvalho de Melo  * the flag bit will be set.
6277d8bf9cSArnaldo Carvalho de Melo  * The resulting situation is that tcp_v[46]_verify_bind() can just check
6377d8bf9cSArnaldo Carvalho de Melo  * for this flag bit, if it is set and the socket trying to bind has
6477d8bf9cSArnaldo Carvalho de Melo  * sk->sk_reuse set, we don't even have to walk the owners list at all,
6577d8bf9cSArnaldo Carvalho de Melo  * we return that it is ok to bind this socket to the requested local port.
6677d8bf9cSArnaldo Carvalho de Melo  *
6777d8bf9cSArnaldo Carvalho de Melo  * Sounds like a lot of work, but it is worth it.  In a more naive
6877d8bf9cSArnaldo Carvalho de Melo  * implementation (ie. current FreeBSD etc.) the entire list of ports
6977d8bf9cSArnaldo Carvalho de Melo  * must be walked for each data port opened by an ftp server.  Needless
7077d8bf9cSArnaldo Carvalho de Melo  * to say, this does not scale at all.  With a couple thousand FTP
7177d8bf9cSArnaldo Carvalho de Melo  * users logged onto your box, isn't it nice to know that new data
7277d8bf9cSArnaldo Carvalho de Melo  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
7377d8bf9cSArnaldo Carvalho de Melo  */
74637bc8bbSJosef Bacik #define FASTREUSEPORT_ANY	1
75637bc8bbSJosef Bacik #define FASTREUSEPORT_STRICT	2
76637bc8bbSJosef Bacik 
7777d8bf9cSArnaldo Carvalho de Melo struct inet_bind_bucket {
780c5c9fb5SEric W. Biederman 	possible_net_t		ib_net;
793c82a21fSRobert Shearman 	int			l3mdev;
8077d8bf9cSArnaldo Carvalho de Melo 	unsigned short		port;
81da5e3630STom Herbert 	signed char		fastreuse;
82da5e3630STom Herbert 	signed char		fastreuseport;
83da5e3630STom Herbert 	kuid_t			fastuid;
84637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
85637bc8bbSJosef Bacik 	struct in6_addr		fast_v6_rcv_saddr;
86637bc8bbSJosef Bacik #endif
87637bc8bbSJosef Bacik 	__be32			fast_rcv_saddr;
88637bc8bbSJosef Bacik 	unsigned short		fast_sk_family;
89637bc8bbSJosef Bacik 	bool			fast_ipv6_only;
9077d8bf9cSArnaldo Carvalho de Melo 	struct hlist_node	node;
9177d8bf9cSArnaldo Carvalho de Melo 	struct hlist_head	owners;
9277d8bf9cSArnaldo Carvalho de Melo };
9377d8bf9cSArnaldo Carvalho de Melo 
9428044fc1SJoanne Koong struct inet_bind2_bucket {
9528044fc1SJoanne Koong 	possible_net_t		ib_net;
9628044fc1SJoanne Koong 	int			l3mdev;
9728044fc1SJoanne Koong 	unsigned short		port;
985456262dSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6)
995456262dSMartin KaFai Lau 	unsigned short		family;
1005456262dSMartin KaFai Lau #endif
10128044fc1SJoanne Koong 	union {
10228044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
10328044fc1SJoanne Koong 		struct in6_addr		v6_rcv_saddr;
10428044fc1SJoanne Koong #endif
10528044fc1SJoanne Koong 		__be32			rcv_saddr;
10628044fc1SJoanne Koong 	};
10728044fc1SJoanne Koong 	/* Node in the bhash2 inet_bind_hashbucket chain */
10828044fc1SJoanne Koong 	struct hlist_node	node;
10928044fc1SJoanne Koong 	/* List of sockets hashed to this bucket */
11028044fc1SJoanne Koong 	struct hlist_head	owners;
111936a192fSKuniyuki Iwashima 	/* bhash has twsk in owners, but bhash2 has twsk in
112936a192fSKuniyuki Iwashima 	 * deathrow not to add a member in struct sock_common.
113936a192fSKuniyuki Iwashima 	 */
114936a192fSKuniyuki Iwashima 	struct hlist_head	deathrow;
11528044fc1SJoanne Koong };
11628044fc1SJoanne Koong 
ib_net(const struct inet_bind_bucket * ib)11728044fc1SJoanne Koong static inline struct net *ib_net(const struct inet_bind_bucket *ib)
11828044fc1SJoanne Koong {
11928044fc1SJoanne Koong 	return read_pnet(&ib->ib_net);
12028044fc1SJoanne Koong }
12128044fc1SJoanne Koong 
ib2_net(const struct inet_bind2_bucket * ib)12228044fc1SJoanne Koong static inline struct net *ib2_net(const struct inet_bind2_bucket *ib)
1237a9546eeSEric Dumazet {
1247a9546eeSEric Dumazet 	return read_pnet(&ib->ib_net);
1257a9546eeSEric Dumazet }
1267a9546eeSEric Dumazet 
127b67bfe0dSSasha Levin #define inet_bind_bucket_for_each(tb, head) \
128b67bfe0dSSasha Levin 	hlist_for_each_entry(tb, head, node)
12977d8bf9cSArnaldo Carvalho de Melo 
13077d8bf9cSArnaldo Carvalho de Melo struct inet_bind_hashbucket {
13177d8bf9cSArnaldo Carvalho de Melo 	spinlock_t		lock;
13277d8bf9cSArnaldo Carvalho de Melo 	struct hlist_head	chain;
13377d8bf9cSArnaldo Carvalho de Melo };
13477d8bf9cSArnaldo Carvalho de Melo 
1358dbd76e7SEric Dumazet /* Sockets can be hashed in established or listening table.
1368dbd76e7SEric Dumazet  * We must use different 'nulls' end-of-chain value for all hash buckets :
1378dbd76e7SEric Dumazet  * A socket might transition from ESTABLISH to LISTEN state without
1388dbd76e7SEric Dumazet  * RCU grace period. A lookup in ehash table needs to handle this case.
139c25eb3bfSEric Dumazet  */
1408dbd76e7SEric Dumazet #define LISTENING_NULLS_BASE (1U << 29)
1415caea4eaSEric Dumazet struct inet_listen_hashbucket {
1425caea4eaSEric Dumazet 	spinlock_t		lock;
1438dbd76e7SEric Dumazet 	struct hlist_nulls_head	nulls_head;
1448dbd76e7SEric Dumazet };
1455caea4eaSEric Dumazet 
14677d8bf9cSArnaldo Carvalho de Melo /* This is for listening sockets, thus all sockets which possess wildcards. */
14777d8bf9cSArnaldo Carvalho de Melo #define INET_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
14877d8bf9cSArnaldo Carvalho de Melo 
14977d8bf9cSArnaldo Carvalho de Melo struct inet_hashinfo {
15077d8bf9cSArnaldo Carvalho de Melo 	/* This is for sockets with full identity only.  Sockets here will
15177d8bf9cSArnaldo Carvalho de Melo 	 * always be without wildcards and will have the following invariant:
15277d8bf9cSArnaldo Carvalho de Melo 	 *
15377d8bf9cSArnaldo Carvalho de Melo 	 *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
15477d8bf9cSArnaldo Carvalho de Melo 	 *
15577d8bf9cSArnaldo Carvalho de Melo 	 */
15677d8bf9cSArnaldo Carvalho de Melo 	struct inet_ehash_bucket	*ehash;
1579db66bdcSEric Dumazet 	spinlock_t			*ehash_locks;
158f373b53bSEric Dumazet 	unsigned int			ehash_mask;
159230140cfSEric Dumazet 	unsigned int			ehash_locks_mask;
16077d8bf9cSArnaldo Carvalho de Melo 
16177d8bf9cSArnaldo Carvalho de Melo 	/* Ok, let's try this, I give up, we do need a local binding
16277d8bf9cSArnaldo Carvalho de Melo 	 * TCP hash as well as the others for fast bind/connect.
16377d8bf9cSArnaldo Carvalho de Melo 	 */
1645caea4eaSEric Dumazet 	struct kmem_cache		*bind_bucket_cachep;
16528044fc1SJoanne Koong 	/* This bind table is hashed by local port */
16661b7c691SMartin KaFai Lau 	struct inet_bind_hashbucket	*bhash;
16728044fc1SJoanne Koong 	struct kmem_cache		*bind2_bucket_cachep;
16828044fc1SJoanne Koong 	/* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4)
16928044fc1SJoanne Koong 	 * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used
17028044fc1SJoanne Koong 	 * primarily for expediting bind conflict resolution.
17128044fc1SJoanne Koong 	 */
17228044fc1SJoanne Koong 	struct inet_bind_hashbucket	*bhash2;
17361b7c691SMartin KaFai Lau 	unsigned int			bhash_size;
17461b7c691SMartin KaFai Lau 
17561b7c691SMartin KaFai Lau 	/* The 2nd listener table hashed by local port and address */
17661b7c691SMartin KaFai Lau 	unsigned int			lhash2_mask;
17761b7c691SMartin KaFai Lau 	struct inet_listen_hashbucket	*lhash2;
178d1e5e640SKuniyuki Iwashima 
179d1e5e640SKuniyuki Iwashima 	bool				pernet;
1806f5ca184SEric Dumazet } ____cacheline_aligned_in_smp;
18177d8bf9cSArnaldo Carvalho de Melo 
tcp_or_dccp_get_hashinfo(const struct sock * sk)182429e42c1SKuniyuki Iwashima static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
183429e42c1SKuniyuki Iwashima {
184429e42c1SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IP_DCCP)
185429e42c1SKuniyuki Iwashima 	return sk->sk_prot->h.hashinfo ? :
186429e42c1SKuniyuki Iwashima 		sock_net(sk)->ipv4.tcp_death_row.hashinfo;
187429e42c1SKuniyuki Iwashima #else
188429e42c1SKuniyuki Iwashima 	return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
189429e42c1SKuniyuki Iwashima #endif
190429e42c1SKuniyuki Iwashima }
191429e42c1SKuniyuki Iwashima 
19261b7c691SMartin KaFai Lau static inline struct inet_listen_hashbucket *
inet_lhash2_bucket(struct inet_hashinfo * h,u32 hash)19361b7c691SMartin KaFai Lau inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
19461b7c691SMartin KaFai Lau {
19561b7c691SMartin KaFai Lau 	return &h->lhash2[hash & h->lhash2_mask];
19661b7c691SMartin KaFai Lau }
19761b7c691SMartin KaFai Lau 
inet_ehash_bucket(struct inet_hashinfo * hashinfo,unsigned int hash)19881c3d547SEric Dumazet static inline struct inet_ehash_bucket *inet_ehash_bucket(
19981c3d547SEric Dumazet 	struct inet_hashinfo *hashinfo,
20081c3d547SEric Dumazet 	unsigned int hash)
20181c3d547SEric Dumazet {
202f373b53bSEric Dumazet 	return &hashinfo->ehash[hash & hashinfo->ehash_mask];
203304a1618SArnaldo Carvalho de Melo }
204304a1618SArnaldo Carvalho de Melo 
inet_ehash_lockp(struct inet_hashinfo * hashinfo,unsigned int hash)2059db66bdcSEric Dumazet static inline spinlock_t *inet_ehash_lockp(
206230140cfSEric Dumazet 	struct inet_hashinfo *hashinfo,
207230140cfSEric Dumazet 	unsigned int hash)
208230140cfSEric Dumazet {
209230140cfSEric Dumazet 	return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
210230140cfSEric Dumazet }
211230140cfSEric Dumazet 
212095dc8e0SEric Dumazet int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
213230140cfSEric Dumazet 
inet_hashinfo2_free_mod(struct inet_hashinfo * h)214c96b6accSWang Hai static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
215c96b6accSWang Hai {
216c96b6accSWang Hai 	kfree(h->lhash2);
217c96b6accSWang Hai 	h->lhash2 = NULL;
218c96b6accSWang Hai }
219c96b6accSWang Hai 
inet_ehash_locks_free(struct inet_hashinfo * hashinfo)220230140cfSEric Dumazet static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
221230140cfSEric Dumazet {
222095dc8e0SEric Dumazet 	kvfree(hashinfo->ehash_locks);
223230140cfSEric Dumazet 	hashinfo->ehash_locks = NULL;
224230140cfSEric Dumazet }
225230140cfSEric Dumazet 
226d1e5e640SKuniyuki Iwashima struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
227d1e5e640SKuniyuki Iwashima 						 unsigned int ehash_entries);
228d1e5e640SKuniyuki Iwashima void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo);
229d1e5e640SKuniyuki Iwashima 
2301fd51155SJoe Perches struct inet_bind_bucket *
2311fd51155SJoe Perches inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
23277d8bf9cSArnaldo Carvalho de Melo 			struct inet_bind_hashbucket *head,
2333c82a21fSRobert Shearman 			const unsigned short snum, int l3mdev);
2341fd51155SJoe Perches void inet_bind_bucket_destroy(struct kmem_cache *cachep,
23577d8bf9cSArnaldo Carvalho de Melo 			      struct inet_bind_bucket *tb);
23677d8bf9cSArnaldo Carvalho de Melo 
23728044fc1SJoanne Koong bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
23828044fc1SJoanne Koong 			    const struct net *net, unsigned short port,
23928044fc1SJoanne Koong 			    int l3mdev);
24028044fc1SJoanne Koong 
24128044fc1SJoanne Koong struct inet_bind2_bucket *
24228044fc1SJoanne Koong inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
24328044fc1SJoanne Koong 			 struct inet_bind_hashbucket *head,
24428044fc1SJoanne Koong 			 unsigned short port, int l3mdev,
24528044fc1SJoanne Koong 			 const struct sock *sk);
24628044fc1SJoanne Koong 
24728044fc1SJoanne Koong void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
24828044fc1SJoanne Koong 			       struct inet_bind2_bucket *tb);
24928044fc1SJoanne Koong 
25028044fc1SJoanne Koong struct inet_bind2_bucket *
25128044fc1SJoanne Koong inet_bind2_bucket_find(const struct inet_bind_hashbucket *head,
25228044fc1SJoanne Koong 		       const struct net *net,
25328044fc1SJoanne Koong 		       unsigned short port, int l3mdev,
25428044fc1SJoanne Koong 		       const struct sock *sk);
25528044fc1SJoanne Koong 
25628044fc1SJoanne Koong bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb,
25728044fc1SJoanne Koong 				      const struct net *net, unsigned short port,
25828044fc1SJoanne Koong 				      int l3mdev, const struct sock *sk);
25928044fc1SJoanne Koong 
inet_bhashfn(const struct net * net,const __u16 lport,const u32 bhash_size)2606eada011SEric Dumazet static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
2616eada011SEric Dumazet 			       const u32 bhash_size)
26277d8bf9cSArnaldo Carvalho de Melo {
2630b441916SPavel Emelyanov 	return (lport + net_hash_mix(net)) & (bhash_size - 1);
26477d8bf9cSArnaldo Carvalho de Melo }
26577d8bf9cSArnaldo Carvalho de Melo 
26628044fc1SJoanne Koong static inline struct inet_bind_hashbucket *
inet_bhashfn_portaddr(const struct inet_hashinfo * hinfo,const struct sock * sk,const struct net * net,unsigned short port)26728044fc1SJoanne Koong inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
26828044fc1SJoanne Koong 		      const struct net *net, unsigned short port)
26928044fc1SJoanne Koong {
27028044fc1SJoanne Koong 	u32 hash;
27128044fc1SJoanne Koong 
27228044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
27328044fc1SJoanne Koong 	if (sk->sk_family == AF_INET6)
27428044fc1SJoanne Koong 		hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
27528044fc1SJoanne Koong 	else
27628044fc1SJoanne Koong #endif
27728044fc1SJoanne Koong 		hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
27828044fc1SJoanne Koong 	return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
27928044fc1SJoanne Koong }
28028044fc1SJoanne Koong 
28128044fc1SJoanne Koong struct inet_bind_hashbucket *
28228044fc1SJoanne Koong inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
28328044fc1SJoanne Koong 
28428044fc1SJoanne Koong /* This should be called whenever a socket's sk_rcv_saddr (ipv4) or
28528044fc1SJoanne Koong  * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
28628044fc1SJoanne Koong  * rcv_saddr field should already have been updated when this is called.
28728044fc1SJoanne Koong  */
2888c5dae4cSKuniyuki Iwashima int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family);
289e0833d1fSKuniyuki Iwashima void inet_bhash2_reset_saddr(struct sock *sk);
29028044fc1SJoanne Koong 
2911fd51155SJoe Perches void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
29228044fc1SJoanne Koong 		    struct inet_bind2_bucket *tb2, unsigned short port);
2932d8c4ce5SArnaldo Carvalho de Melo 
2942d8c4ce5SArnaldo Carvalho de Melo /* Caller must disable local BH processing. */
2951ce31c9eSEric Dumazet int __inet_inherit_port(const struct sock *sk, struct sock *child);
2962d8c4ce5SArnaldo Carvalho de Melo 
2971fd51155SJoe Perches void inet_put_port(struct sock *sk);
2982d8c4ce5SArnaldo Carvalho de Melo 
29961b7c691SMartin KaFai Lau void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
30061b7c691SMartin KaFai Lau 			 unsigned long numentries, int scale,
30161b7c691SMartin KaFai Lau 			 unsigned long low_limit,
30261b7c691SMartin KaFai Lau 			 unsigned long high_limit);
303c92c81dfSPeter Oskolkov int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
304f3f05f70SArnaldo Carvalho de Melo 
30501770a16SRicardo Dias bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
30601770a16SRicardo Dias bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
30701770a16SRicardo Dias 			 bool *found_dup_sk);
308fe38d2a1SJosef Bacik int __inet_hash(struct sock *sk, struct sock *osk);
309086c653fSCraig Gallek int inet_hash(struct sock *sk);
3101fd51155SJoe Perches void inet_unhash(struct sock *sk);
31133b62231SArnaldo Carvalho de Melo 
3121fd51155SJoe Perches struct sock *__inet_lookup_listener(struct net *net,
313c67499c0SPavel Emelyanov 				    struct inet_hashinfo *hashinfo,
314a583636aSCraig Gallek 				    struct sk_buff *skb, int doff,
3151fd51155SJoe Perches 				    const __be32 saddr, const __be16 sport,
316fb99c848SAl Viro 				    const __be32 daddr,
3178f491069SHerbert Xu 				    const unsigned short hnum,
3183fa6f616SDavid Ahern 				    const int dif, const int sdif);
3198f491069SHerbert Xu 
inet_lookup_listener(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,__be32 saddr,__be16 sport,__be32 daddr,__be16 dport,int dif,int sdif)320c67499c0SPavel Emelyanov static inline struct sock *inet_lookup_listener(struct net *net,
321c67499c0SPavel Emelyanov 		struct inet_hashinfo *hashinfo,
322a583636aSCraig Gallek 		struct sk_buff *skb, int doff,
323da5e3630STom Herbert 		__be32 saddr, __be16 sport,
3243fa6f616SDavid Ahern 		__be32 daddr, __be16 dport, int dif, int sdif)
3258f491069SHerbert Xu {
326a583636aSCraig Gallek 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
3273fa6f616SDavid Ahern 				      daddr, ntohs(dport), dif, sdif);
3288f491069SHerbert Xu }
3298feaf0c0SArnaldo Carvalho de Melo 
3308feaf0c0SArnaldo Carvalho de Melo /* Socket demux engine toys. */
3314f765d84SAl Viro /* What happens here is ugly; there's a pair of adjacent fields in
3324f765d84SAl Viro    struct inet_sock; __be16 dport followed by __u16 num.  We want to
3334f765d84SAl Viro    search by pair, so we combine the keys into a single 32bit value
3344f765d84SAl Viro    and compare with 32bit value read from &...->dport.  Let's at least
3354f765d84SAl Viro    make sure that it's not mixed with anything else...
3364f765d84SAl Viro    On 64bit targets we combine comparisons with pair of adjacent __be32
3374f765d84SAl Viro    fields in the same way.
3384f765d84SAl Viro */
3398feaf0c0SArnaldo Carvalho de Melo #ifdef __BIG_ENDIAN
3408feaf0c0SArnaldo Carvalho de Melo #define INET_COMBINED_PORTS(__sport, __dport) \
3414f765d84SAl Viro 	((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport)))
3428feaf0c0SArnaldo Carvalho de Melo #else /* __LITTLE_ENDIAN */
3438feaf0c0SArnaldo Carvalho de Melo #define INET_COMBINED_PORTS(__sport, __dport) \
3444f765d84SAl Viro 	((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
3458feaf0c0SArnaldo Carvalho de Melo #endif
3468feaf0c0SArnaldo Carvalho de Melo 
3478feaf0c0SArnaldo Carvalho de Melo #ifdef __BIG_ENDIAN
3488feaf0c0SArnaldo Carvalho de Melo #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
3494f765d84SAl Viro 	const __addrpair __name = (__force __addrpair) ( \
3504f765d84SAl Viro 				   (((__force __u64)(__be32)(__saddr)) << 32) | \
351c7228317SJoe Perches 				   ((__force __u64)(__be32)(__daddr)))
3528feaf0c0SArnaldo Carvalho de Melo #else /* __LITTLE_ENDIAN */
3538feaf0c0SArnaldo Carvalho de Melo #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
3544f765d84SAl Viro 	const __addrpair __name = (__force __addrpair) ( \
3554f765d84SAl Viro 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
356c7228317SJoe Perches 				   ((__force __u64)(__be32)(__saddr)))
3578feaf0c0SArnaldo Carvalho de Melo #endif /* __BIG_ENDIAN */
358c7228317SJoe Perches 
inet_match(struct net * net,const struct sock * sk,const __addrpair cookie,const __portpair ports,int dif,int sdif)359eda090c3SEric Dumazet static inline bool inet_match(struct net *net, const struct sock *sk,
3604915d50eSEric Dumazet 			      const __addrpair cookie, const __portpair ports,
3614915d50eSEric Dumazet 			      int dif, int sdif)
3624915d50eSEric Dumazet {
3634915d50eSEric Dumazet 	if (!net_eq(sock_net(sk), net) ||
3644915d50eSEric Dumazet 	    sk->sk_portpair != ports ||
3654915d50eSEric Dumazet 	    sk->sk_addrpair != cookie)
3664915d50eSEric Dumazet 	        return false;
3674915d50eSEric Dumazet 
368944fd1aeSMike Manning 	/* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
369944fd1aeSMike Manning 	return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
370944fd1aeSMike Manning 				    sdif);
3714915d50eSEric Dumazet }
372e48c414eSArnaldo Carvalho de Melo 
373ee3cf32aSEric Dumazet /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
374e48c414eSArnaldo Carvalho de Melo  * not check it for lookups anymore, thanks Alexey. -DaveM
375e48c414eSArnaldo Carvalho de Melo  */
3761fd51155SJoe Perches struct sock *__inet_lookup_established(struct net *net,
377c67499c0SPavel Emelyanov 				       struct inet_hashinfo *hashinfo,
378fb99c848SAl Viro 				       const __be32 saddr, const __be16 sport,
3791fd51155SJoe Perches 				       const __be32 daddr, const u16 hnum,
3803fa6f616SDavid Ahern 				       const int dif, const int sdif);
381e48c414eSArnaldo Carvalho de Melo 
3820f495f76SLorenz Bauer typedef u32 (inet_ehashfn_t)(const struct net *net,
3830f495f76SLorenz Bauer 			      const __be32 laddr, const __u16 lport,
3840f495f76SLorenz Bauer 			      const __be32 faddr, const __be16 fport);
3850f495f76SLorenz Bauer 
3860f495f76SLorenz Bauer inet_ehashfn_t inet_ehashfn;
3870f495f76SLorenz Bauer 
3880f495f76SLorenz Bauer INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
3890f495f76SLorenz Bauer 
390ce796e60SLorenz Bauer struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
391ce796e60SLorenz Bauer 				   struct sk_buff *skb, int doff,
392ce796e60SLorenz Bauer 				   __be32 saddr, __be16 sport,
3930f495f76SLorenz Bauer 				   __be32 daddr, unsigned short hnum,
3940f495f76SLorenz Bauer 				   inet_ehashfn_t *ehashfn);
395ce796e60SLorenz Bauer 
3966c886db2SLorenz Bauer struct sock *inet_lookup_run_sk_lookup(struct net *net,
3976c886db2SLorenz Bauer 				       int protocol,
3986c886db2SLorenz Bauer 				       struct sk_buff *skb, int doff,
3996c886db2SLorenz Bauer 				       __be32 saddr, __be16 sport,
4006c886db2SLorenz Bauer 				       __be32 daddr, u16 hnum, const int dif,
4016c886db2SLorenz Bauer 				       inet_ehashfn_t *ehashfn);
4026c886db2SLorenz Bauer 
4038f491069SHerbert Xu static inline struct sock *
inet_lookup_established(struct net * net,struct inet_hashinfo * hashinfo,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif)404c67499c0SPavel Emelyanov 	inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
405fb99c848SAl Viro 				const __be32 saddr, const __be16 sport,
406fb99c848SAl Viro 				const __be32 daddr, const __be16 dport,
407e48c414eSArnaldo Carvalho de Melo 				const int dif)
408e48c414eSArnaldo Carvalho de Melo {
409c67499c0SPavel Emelyanov 	return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
4103fa6f616SDavid Ahern 					 ntohs(dport), dif, 0);
4118f491069SHerbert Xu }
4128f491069SHerbert Xu 
__inet_lookup(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif,const int sdif,bool * refcounted)413c67499c0SPavel Emelyanov static inline struct sock *__inet_lookup(struct net *net,
414c67499c0SPavel Emelyanov 					 struct inet_hashinfo *hashinfo,
415a583636aSCraig Gallek 					 struct sk_buff *skb, int doff,
416fb99c848SAl Viro 					 const __be32 saddr, const __be16 sport,
417fb99c848SAl Viro 					 const __be32 daddr, const __be16 dport,
4183fa6f616SDavid Ahern 					 const int dif, const int sdif,
4193b24d854SEric Dumazet 					 bool *refcounted)
4208f491069SHerbert Xu {
4218f491069SHerbert Xu 	u16 hnum = ntohs(dport);
4223b24d854SEric Dumazet 	struct sock *sk;
423c67499c0SPavel Emelyanov 
4243b24d854SEric Dumazet 	sk = __inet_lookup_established(net, hashinfo, saddr, sport,
4253fa6f616SDavid Ahern 				       daddr, hnum, dif, sdif);
4263b24d854SEric Dumazet 	*refcounted = true;
4273b24d854SEric Dumazet 	if (sk)
4283b24d854SEric Dumazet 		return sk;
4293b24d854SEric Dumazet 	*refcounted = false;
4303b24d854SEric Dumazet 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
4313fa6f616SDavid Ahern 				      sport, daddr, hnum, dif, sdif);
432e48c414eSArnaldo Carvalho de Melo }
433e48c414eSArnaldo Carvalho de Melo 
inet_lookup(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif)434c67499c0SPavel Emelyanov static inline struct sock *inet_lookup(struct net *net,
435c67499c0SPavel Emelyanov 				       struct inet_hashinfo *hashinfo,
436a583636aSCraig Gallek 				       struct sk_buff *skb, int doff,
437fb99c848SAl Viro 				       const __be32 saddr, const __be16 sport,
438fb99c848SAl Viro 				       const __be32 daddr, const __be16 dport,
439e48c414eSArnaldo Carvalho de Melo 				       const int dif)
440e48c414eSArnaldo Carvalho de Melo {
441e48c414eSArnaldo Carvalho de Melo 	struct sock *sk;
4423b24d854SEric Dumazet 	bool refcounted;
443e48c414eSArnaldo Carvalho de Melo 
444a583636aSCraig Gallek 	sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
4453fa6f616SDavid Ahern 			   dport, dif, 0, &refcounted);
446e48c414eSArnaldo Carvalho de Melo 
44741c6d650SReshetova, Elena 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
4483b24d854SEric Dumazet 		sk = NULL;
449e48c414eSArnaldo Carvalho de Melo 	return sk;
450e48c414eSArnaldo Carvalho de Melo }
451a7f5e7f1SArnaldo Carvalho de Melo 
4529c02bec9SLorenz Bauer static inline
inet_steal_sock(struct net * net,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,bool * refcounted,inet_ehashfn_t * ehashfn)4539c02bec9SLorenz Bauer struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
4549c02bec9SLorenz Bauer 			     const __be32 saddr, const __be16 sport,
4559c02bec9SLorenz Bauer 			     const __be32 daddr, const __be16 dport,
4569c02bec9SLorenz Bauer 			     bool *refcounted, inet_ehashfn_t *ehashfn)
4579c02bec9SLorenz Bauer {
4589c02bec9SLorenz Bauer 	struct sock *sk, *reuse_sk;
4599c02bec9SLorenz Bauer 	bool prefetched;
4609c02bec9SLorenz Bauer 
4619c02bec9SLorenz Bauer 	sk = skb_steal_sock(skb, refcounted, &prefetched);
4629c02bec9SLorenz Bauer 	if (!sk)
4639c02bec9SLorenz Bauer 		return NULL;
4649c02bec9SLorenz Bauer 
465*8897562fSLorenz Bauer 	if (!prefetched || !sk_fullsock(sk))
4669c02bec9SLorenz Bauer 		return sk;
4679c02bec9SLorenz Bauer 
4689c02bec9SLorenz Bauer 	if (sk->sk_protocol == IPPROTO_TCP) {
4699c02bec9SLorenz Bauer 		if (sk->sk_state != TCP_LISTEN)
4709c02bec9SLorenz Bauer 			return sk;
4719c02bec9SLorenz Bauer 	} else if (sk->sk_protocol == IPPROTO_UDP) {
4729c02bec9SLorenz Bauer 		if (sk->sk_state != TCP_CLOSE)
4739c02bec9SLorenz Bauer 			return sk;
4749c02bec9SLorenz Bauer 	} else {
4759c02bec9SLorenz Bauer 		return sk;
4769c02bec9SLorenz Bauer 	}
4779c02bec9SLorenz Bauer 
4789c02bec9SLorenz Bauer 	reuse_sk = inet_lookup_reuseport(net, sk, skb, doff,
4799c02bec9SLorenz Bauer 					 saddr, sport, daddr, ntohs(dport),
4809c02bec9SLorenz Bauer 					 ehashfn);
4819c02bec9SLorenz Bauer 	if (!reuse_sk)
4829c02bec9SLorenz Bauer 		return sk;
4839c02bec9SLorenz Bauer 
4849c02bec9SLorenz Bauer 	/* We've chosen a new reuseport sock which is never refcounted. This
4859c02bec9SLorenz Bauer 	 * implies that sk also isn't refcounted.
4869c02bec9SLorenz Bauer 	 */
4879c02bec9SLorenz Bauer 	WARN_ON_ONCE(*refcounted);
4889c02bec9SLorenz Bauer 
4899c02bec9SLorenz Bauer 	return reuse_sk;
4909c02bec9SLorenz Bauer }
4919c02bec9SLorenz Bauer 
__inet_lookup_skb(struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be16 sport,const __be16 dport,const int sdif,bool * refcounted)4929a1f27c4SArnaldo Carvalho de Melo static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
4939a1f27c4SArnaldo Carvalho de Melo 					     struct sk_buff *skb,
494a583636aSCraig Gallek 					     int doff,
4959a1f27c4SArnaldo Carvalho de Melo 					     const __be16 sport,
4963b24d854SEric Dumazet 					     const __be16 dport,
4973fa6f616SDavid Ahern 					     const int sdif,
4983b24d854SEric Dumazet 					     bool *refcounted)
4999a1f27c4SArnaldo Carvalho de Melo {
5009c02bec9SLorenz Bauer 	struct net *net = dev_net(skb_dst(skb)->dev);
5019a1f27c4SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
5029c02bec9SLorenz Bauer 	struct sock *sk;
5039a1f27c4SArnaldo Carvalho de Melo 
5049c02bec9SLorenz Bauer 	sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport,
5059c02bec9SLorenz Bauer 			     refcounted, inet_ehashfn);
5069c02bec9SLorenz Bauer 	if (IS_ERR(sk))
5079c02bec9SLorenz Bauer 		return NULL;
50841063e9dSDavid S. Miller 	if (sk)
50923542618SKOVACS Krisztian 		return sk;
5103b24d854SEric Dumazet 
5119c02bec9SLorenz Bauer 	return __inet_lookup(net, hashinfo, skb,
512a583636aSCraig Gallek 			     doff, iph->saddr, sport,
5133fa6f616SDavid Ahern 			     iph->daddr, dport, inet_iif(skb), sdif,
5143b24d854SEric Dumazet 			     refcounted);
5159a1f27c4SArnaldo Carvalho de Melo }
5169a1f27c4SArnaldo Carvalho de Melo 
sk_daddr_set(struct sock * sk,__be32 addr)517d1e559d0SEric Dumazet static inline void sk_daddr_set(struct sock *sk, __be32 addr)
518d1e559d0SEric Dumazet {
519d1e559d0SEric Dumazet 	sk->sk_daddr = addr; /* alias of inet_daddr */
520d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
521d1e559d0SEric Dumazet 	ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr);
522d1e559d0SEric Dumazet #endif
523d1e559d0SEric Dumazet }
524d1e559d0SEric Dumazet 
sk_rcv_saddr_set(struct sock * sk,__be32 addr)525d1e559d0SEric Dumazet static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
526d1e559d0SEric Dumazet {
527d1e559d0SEric Dumazet 	sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */
528d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
529d1e559d0SEric Dumazet 	ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr);
530d1e559d0SEric Dumazet #endif
531d1e559d0SEric Dumazet }
5325b441f76SEric Dumazet 
5331fd51155SJoe Perches int __inet_hash_connect(struct inet_timewait_death_row *death_row,
534b2d05756SWilly Tarreau 			struct sock *sk, u64 port_offset,
5355ee31fc1SPavel Emelyanov 			int (*check_established)(struct inet_timewait_death_row *,
5361fd51155SJoe Perches 						 struct sock *, __u16,
537b4d6444eSEric Dumazet 						 struct inet_timewait_sock **));
5389327f705SEric Dumazet 
5391fd51155SJoe Perches int inet_hash_connect(struct inet_timewait_death_row *death_row,
540a7f5e7f1SArnaldo Carvalho de Melo 		      struct sock *sk);
541304a1618SArnaldo Carvalho de Melo #endif /* _INET_HASHTABLES_H */
542