12874c5fdSThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-or-later */
2304a1618SArnaldo Carvalho de Melo /*
3304a1618SArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX
4304a1618SArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket
5304a1618SArnaldo Carvalho de Melo * interface as the means of communication with the user level.
6304a1618SArnaldo Carvalho de Melo *
7304a1618SArnaldo Carvalho de Melo * Authors: Lotsa people, from code originally in tcp
8304a1618SArnaldo Carvalho de Melo */
9304a1618SArnaldo Carvalho de Melo
10304a1618SArnaldo Carvalho de Melo #ifndef _INET_HASHTABLES_H
11304a1618SArnaldo Carvalho de Melo #define _INET_HASHTABLES_H
12304a1618SArnaldo Carvalho de Melo
138feaf0c0SArnaldo Carvalho de Melo
142d8c4ce5SArnaldo Carvalho de Melo #include <linux/interrupt.h>
159a1f27c4SArnaldo Carvalho de Melo #include <linux/ip.h>
1633b62231SArnaldo Carvalho de Melo #include <linux/ipv6.h>
1777d8bf9cSArnaldo Carvalho de Melo #include <linux/list.h>
1877d8bf9cSArnaldo Carvalho de Melo #include <linux/slab.h>
1933b62231SArnaldo Carvalho de Melo #include <linux/socket.h>
2077d8bf9cSArnaldo Carvalho de Melo #include <linux/spinlock.h>
21304a1618SArnaldo Carvalho de Melo #include <linux/types.h>
22f3f05f70SArnaldo Carvalho de Melo #include <linux/wait.h>
23304a1618SArnaldo Carvalho de Melo
24463c84b9SArnaldo Carvalho de Melo #include <net/inet_connection_sock.h>
2514c85021SArnaldo Carvalho de Melo #include <net/inet_sock.h>
2628044fc1SJoanne Koong #include <net/ip.h>
272d8c4ce5SArnaldo Carvalho de Melo #include <net/sock.h>
289a1f27c4SArnaldo Carvalho de Melo #include <net/route.h>
29c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
300b441916SPavel Emelyanov #include <net/netns/hash.h>
312d8c4ce5SArnaldo Carvalho de Melo
3241c6d650SReshetova, Elena #include <linux/refcount.h>
33e48c414eSArnaldo Carvalho de Melo #include <asm/byteorder.h>
34f3f05f70SArnaldo Carvalho de Melo
3577d8bf9cSArnaldo Carvalho de Melo /* This is for all connections with a full identity, no wildcards.
3605dbc7b5SEric Dumazet * The 'e' prefix stands for Establish, but we really put all sockets
3705dbc7b5SEric Dumazet * but LISTEN ones.
3877d8bf9cSArnaldo Carvalho de Melo */
3977d8bf9cSArnaldo Carvalho de Melo struct inet_ehash_bucket {
403ab5aee7SEric Dumazet struct hlist_nulls_head chain;
416d255361SEric Dumazet };
4277d8bf9cSArnaldo Carvalho de Melo
4377d8bf9cSArnaldo Carvalho de Melo /* There are a few simple rules, which allow for local port reuse by
4477d8bf9cSArnaldo Carvalho de Melo * an application. In essence:
4577d8bf9cSArnaldo Carvalho de Melo *
4677d8bf9cSArnaldo Carvalho de Melo * 1) Sockets bound to different interfaces may share a local port.
4777d8bf9cSArnaldo Carvalho de Melo * Failing that, goto test 2.
4877d8bf9cSArnaldo Carvalho de Melo * 2) If all sockets have sk->sk_reuse set, and none of them are in
4977d8bf9cSArnaldo Carvalho de Melo * TCP_LISTEN state, the port may be shared.
5077d8bf9cSArnaldo Carvalho de Melo * Failing that, goto test 3.
5177d8bf9cSArnaldo Carvalho de Melo * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
5277d8bf9cSArnaldo Carvalho de Melo * address, and none of them are the same, the port may be
5377d8bf9cSArnaldo Carvalho de Melo * shared.
5477d8bf9cSArnaldo Carvalho de Melo * Failing this, the port cannot be shared.
5577d8bf9cSArnaldo Carvalho de Melo *
5677d8bf9cSArnaldo Carvalho de Melo * The interesting point, is test #2. This is what an FTP server does
5777d8bf9cSArnaldo Carvalho de Melo * all day. To optimize this case we use a specific flag bit defined
5877d8bf9cSArnaldo Carvalho de Melo * below. As we add sockets to a bind bucket list, we perform a
5977d8bf9cSArnaldo Carvalho de Melo * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
6077d8bf9cSArnaldo Carvalho de Melo * As long as all sockets added to a bind bucket pass this test,
6177d8bf9cSArnaldo Carvalho de Melo * the flag bit will be set.
6277d8bf9cSArnaldo Carvalho de Melo * The resulting situation is that tcp_v[46]_verify_bind() can just check
6377d8bf9cSArnaldo Carvalho de Melo * for this flag bit, if it is set and the socket trying to bind has
6477d8bf9cSArnaldo Carvalho de Melo * sk->sk_reuse set, we don't even have to walk the owners list at all,
6577d8bf9cSArnaldo Carvalho de Melo * we return that it is ok to bind this socket to the requested local port.
6677d8bf9cSArnaldo Carvalho de Melo *
6777d8bf9cSArnaldo Carvalho de Melo * Sounds like a lot of work, but it is worth it. In a more naive
6877d8bf9cSArnaldo Carvalho de Melo * implementation (ie. current FreeBSD etc.) the entire list of ports
6977d8bf9cSArnaldo Carvalho de Melo * must be walked for each data port opened by an ftp server. Needless
7077d8bf9cSArnaldo Carvalho de Melo * to say, this does not scale at all. With a couple thousand FTP
7177d8bf9cSArnaldo Carvalho de Melo * users logged onto your box, isn't it nice to know that new data
7277d8bf9cSArnaldo Carvalho de Melo * ports are created in O(1) time? I thought so. ;-) -DaveM
7377d8bf9cSArnaldo Carvalho de Melo */
74637bc8bbSJosef Bacik #define FASTREUSEPORT_ANY 1
75637bc8bbSJosef Bacik #define FASTREUSEPORT_STRICT 2
76637bc8bbSJosef Bacik
7777d8bf9cSArnaldo Carvalho de Melo struct inet_bind_bucket {
780c5c9fb5SEric W. Biederman possible_net_t ib_net;
793c82a21fSRobert Shearman int l3mdev;
8077d8bf9cSArnaldo Carvalho de Melo unsigned short port;
81da5e3630STom Herbert signed char fastreuse;
82da5e3630STom Herbert signed char fastreuseport;
83da5e3630STom Herbert kuid_t fastuid;
84637bc8bbSJosef Bacik #if IS_ENABLED(CONFIG_IPV6)
85637bc8bbSJosef Bacik struct in6_addr fast_v6_rcv_saddr;
86637bc8bbSJosef Bacik #endif
87637bc8bbSJosef Bacik __be32 fast_rcv_saddr;
88637bc8bbSJosef Bacik unsigned short fast_sk_family;
89637bc8bbSJosef Bacik bool fast_ipv6_only;
9077d8bf9cSArnaldo Carvalho de Melo struct hlist_node node;
9177d8bf9cSArnaldo Carvalho de Melo struct hlist_head owners;
9277d8bf9cSArnaldo Carvalho de Melo };
9377d8bf9cSArnaldo Carvalho de Melo
9428044fc1SJoanne Koong struct inet_bind2_bucket {
9528044fc1SJoanne Koong possible_net_t ib_net;
9628044fc1SJoanne Koong int l3mdev;
9728044fc1SJoanne Koong unsigned short port;
985456262dSMartin KaFai Lau #if IS_ENABLED(CONFIG_IPV6)
995456262dSMartin KaFai Lau unsigned short family;
1005456262dSMartin KaFai Lau #endif
10128044fc1SJoanne Koong union {
10228044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
10328044fc1SJoanne Koong struct in6_addr v6_rcv_saddr;
10428044fc1SJoanne Koong #endif
10528044fc1SJoanne Koong __be32 rcv_saddr;
10628044fc1SJoanne Koong };
10728044fc1SJoanne Koong /* Node in the bhash2 inet_bind_hashbucket chain */
10828044fc1SJoanne Koong struct hlist_node node;
10928044fc1SJoanne Koong /* List of sockets hashed to this bucket */
11028044fc1SJoanne Koong struct hlist_head owners;
111936a192fSKuniyuki Iwashima /* bhash has twsk in owners, but bhash2 has twsk in
112936a192fSKuniyuki Iwashima * deathrow not to add a member in struct sock_common.
113936a192fSKuniyuki Iwashima */
114936a192fSKuniyuki Iwashima struct hlist_head deathrow;
11528044fc1SJoanne Koong };
11628044fc1SJoanne Koong
ib_net(const struct inet_bind_bucket * ib)11728044fc1SJoanne Koong static inline struct net *ib_net(const struct inet_bind_bucket *ib)
11828044fc1SJoanne Koong {
11928044fc1SJoanne Koong return read_pnet(&ib->ib_net);
12028044fc1SJoanne Koong }
12128044fc1SJoanne Koong
ib2_net(const struct inet_bind2_bucket * ib)12228044fc1SJoanne Koong static inline struct net *ib2_net(const struct inet_bind2_bucket *ib)
1237a9546eeSEric Dumazet {
1247a9546eeSEric Dumazet return read_pnet(&ib->ib_net);
1257a9546eeSEric Dumazet }
1267a9546eeSEric Dumazet
127b67bfe0dSSasha Levin #define inet_bind_bucket_for_each(tb, head) \
128b67bfe0dSSasha Levin hlist_for_each_entry(tb, head, node)
12977d8bf9cSArnaldo Carvalho de Melo
13077d8bf9cSArnaldo Carvalho de Melo struct inet_bind_hashbucket {
13177d8bf9cSArnaldo Carvalho de Melo spinlock_t lock;
13277d8bf9cSArnaldo Carvalho de Melo struct hlist_head chain;
13377d8bf9cSArnaldo Carvalho de Melo };
13477d8bf9cSArnaldo Carvalho de Melo
1358dbd76e7SEric Dumazet /* Sockets can be hashed in established or listening table.
1368dbd76e7SEric Dumazet * We must use different 'nulls' end-of-chain value for all hash buckets :
1378dbd76e7SEric Dumazet * A socket might transition from ESTABLISH to LISTEN state without
1388dbd76e7SEric Dumazet * RCU grace period. A lookup in ehash table needs to handle this case.
139c25eb3bfSEric Dumazet */
1408dbd76e7SEric Dumazet #define LISTENING_NULLS_BASE (1U << 29)
1415caea4eaSEric Dumazet struct inet_listen_hashbucket {
1425caea4eaSEric Dumazet spinlock_t lock;
1438dbd76e7SEric Dumazet struct hlist_nulls_head nulls_head;
1448dbd76e7SEric Dumazet };
1455caea4eaSEric Dumazet
14677d8bf9cSArnaldo Carvalho de Melo /* This is for listening sockets, thus all sockets which possess wildcards. */
14777d8bf9cSArnaldo Carvalho de Melo #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
14877d8bf9cSArnaldo Carvalho de Melo
14977d8bf9cSArnaldo Carvalho de Melo struct inet_hashinfo {
15077d8bf9cSArnaldo Carvalho de Melo /* This is for sockets with full identity only. Sockets here will
15177d8bf9cSArnaldo Carvalho de Melo * always be without wildcards and will have the following invariant:
15277d8bf9cSArnaldo Carvalho de Melo *
15377d8bf9cSArnaldo Carvalho de Melo * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
15477d8bf9cSArnaldo Carvalho de Melo *
15577d8bf9cSArnaldo Carvalho de Melo */
15677d8bf9cSArnaldo Carvalho de Melo struct inet_ehash_bucket *ehash;
1579db66bdcSEric Dumazet spinlock_t *ehash_locks;
158f373b53bSEric Dumazet unsigned int ehash_mask;
159230140cfSEric Dumazet unsigned int ehash_locks_mask;
16077d8bf9cSArnaldo Carvalho de Melo
16177d8bf9cSArnaldo Carvalho de Melo /* Ok, let's try this, I give up, we do need a local binding
16277d8bf9cSArnaldo Carvalho de Melo * TCP hash as well as the others for fast bind/connect.
16377d8bf9cSArnaldo Carvalho de Melo */
1645caea4eaSEric Dumazet struct kmem_cache *bind_bucket_cachep;
16528044fc1SJoanne Koong /* This bind table is hashed by local port */
16661b7c691SMartin KaFai Lau struct inet_bind_hashbucket *bhash;
16728044fc1SJoanne Koong struct kmem_cache *bind2_bucket_cachep;
16828044fc1SJoanne Koong /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4)
16928044fc1SJoanne Koong * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used
17028044fc1SJoanne Koong * primarily for expediting bind conflict resolution.
17128044fc1SJoanne Koong */
17228044fc1SJoanne Koong struct inet_bind_hashbucket *bhash2;
17361b7c691SMartin KaFai Lau unsigned int bhash_size;
17461b7c691SMartin KaFai Lau
17561b7c691SMartin KaFai Lau /* The 2nd listener table hashed by local port and address */
17661b7c691SMartin KaFai Lau unsigned int lhash2_mask;
17761b7c691SMartin KaFai Lau struct inet_listen_hashbucket *lhash2;
178d1e5e640SKuniyuki Iwashima
179d1e5e640SKuniyuki Iwashima bool pernet;
1806f5ca184SEric Dumazet } ____cacheline_aligned_in_smp;
18177d8bf9cSArnaldo Carvalho de Melo
tcp_or_dccp_get_hashinfo(const struct sock * sk)182429e42c1SKuniyuki Iwashima static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
183429e42c1SKuniyuki Iwashima {
184429e42c1SKuniyuki Iwashima #if IS_ENABLED(CONFIG_IP_DCCP)
185429e42c1SKuniyuki Iwashima return sk->sk_prot->h.hashinfo ? :
186429e42c1SKuniyuki Iwashima sock_net(sk)->ipv4.tcp_death_row.hashinfo;
187429e42c1SKuniyuki Iwashima #else
188429e42c1SKuniyuki Iwashima return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
189429e42c1SKuniyuki Iwashima #endif
190429e42c1SKuniyuki Iwashima }
191429e42c1SKuniyuki Iwashima
19261b7c691SMartin KaFai Lau static inline struct inet_listen_hashbucket *
inet_lhash2_bucket(struct inet_hashinfo * h,u32 hash)19361b7c691SMartin KaFai Lau inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
19461b7c691SMartin KaFai Lau {
19561b7c691SMartin KaFai Lau return &h->lhash2[hash & h->lhash2_mask];
19661b7c691SMartin KaFai Lau }
19761b7c691SMartin KaFai Lau
inet_ehash_bucket(struct inet_hashinfo * hashinfo,unsigned int hash)19881c3d547SEric Dumazet static inline struct inet_ehash_bucket *inet_ehash_bucket(
19981c3d547SEric Dumazet struct inet_hashinfo *hashinfo,
20081c3d547SEric Dumazet unsigned int hash)
20181c3d547SEric Dumazet {
202f373b53bSEric Dumazet return &hashinfo->ehash[hash & hashinfo->ehash_mask];
203304a1618SArnaldo Carvalho de Melo }
204304a1618SArnaldo Carvalho de Melo
inet_ehash_lockp(struct inet_hashinfo * hashinfo,unsigned int hash)2059db66bdcSEric Dumazet static inline spinlock_t *inet_ehash_lockp(
206230140cfSEric Dumazet struct inet_hashinfo *hashinfo,
207230140cfSEric Dumazet unsigned int hash)
208230140cfSEric Dumazet {
209230140cfSEric Dumazet return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
210230140cfSEric Dumazet }
211230140cfSEric Dumazet
212095dc8e0SEric Dumazet int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
213230140cfSEric Dumazet
inet_hashinfo2_free_mod(struct inet_hashinfo * h)214c96b6accSWang Hai static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
215c96b6accSWang Hai {
216c96b6accSWang Hai kfree(h->lhash2);
217c96b6accSWang Hai h->lhash2 = NULL;
218c96b6accSWang Hai }
219c96b6accSWang Hai
inet_ehash_locks_free(struct inet_hashinfo * hashinfo)220230140cfSEric Dumazet static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
221230140cfSEric Dumazet {
222095dc8e0SEric Dumazet kvfree(hashinfo->ehash_locks);
223230140cfSEric Dumazet hashinfo->ehash_locks = NULL;
224230140cfSEric Dumazet }
225230140cfSEric Dumazet
226d1e5e640SKuniyuki Iwashima struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
227d1e5e640SKuniyuki Iwashima unsigned int ehash_entries);
228d1e5e640SKuniyuki Iwashima void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo);
229d1e5e640SKuniyuki Iwashima
2301fd51155SJoe Perches struct inet_bind_bucket *
2311fd51155SJoe Perches inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
23277d8bf9cSArnaldo Carvalho de Melo struct inet_bind_hashbucket *head,
2333c82a21fSRobert Shearman const unsigned short snum, int l3mdev);
2341fd51155SJoe Perches void inet_bind_bucket_destroy(struct kmem_cache *cachep,
23577d8bf9cSArnaldo Carvalho de Melo struct inet_bind_bucket *tb);
23677d8bf9cSArnaldo Carvalho de Melo
23728044fc1SJoanne Koong bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
23828044fc1SJoanne Koong const struct net *net, unsigned short port,
23928044fc1SJoanne Koong int l3mdev);
24028044fc1SJoanne Koong
24128044fc1SJoanne Koong struct inet_bind2_bucket *
24228044fc1SJoanne Koong inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
24328044fc1SJoanne Koong struct inet_bind_hashbucket *head,
24428044fc1SJoanne Koong unsigned short port, int l3mdev,
24528044fc1SJoanne Koong const struct sock *sk);
24628044fc1SJoanne Koong
24728044fc1SJoanne Koong void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
24828044fc1SJoanne Koong struct inet_bind2_bucket *tb);
24928044fc1SJoanne Koong
25028044fc1SJoanne Koong struct inet_bind2_bucket *
25128044fc1SJoanne Koong inet_bind2_bucket_find(const struct inet_bind_hashbucket *head,
25228044fc1SJoanne Koong const struct net *net,
25328044fc1SJoanne Koong unsigned short port, int l3mdev,
25428044fc1SJoanne Koong const struct sock *sk);
25528044fc1SJoanne Koong
25628044fc1SJoanne Koong bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb,
25728044fc1SJoanne Koong const struct net *net, unsigned short port,
25828044fc1SJoanne Koong int l3mdev, const struct sock *sk);
25928044fc1SJoanne Koong
inet_bhashfn(const struct net * net,const __u16 lport,const u32 bhash_size)2606eada011SEric Dumazet static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
2616eada011SEric Dumazet const u32 bhash_size)
26277d8bf9cSArnaldo Carvalho de Melo {
2630b441916SPavel Emelyanov return (lport + net_hash_mix(net)) & (bhash_size - 1);
26477d8bf9cSArnaldo Carvalho de Melo }
26577d8bf9cSArnaldo Carvalho de Melo
26628044fc1SJoanne Koong static inline struct inet_bind_hashbucket *
inet_bhashfn_portaddr(const struct inet_hashinfo * hinfo,const struct sock * sk,const struct net * net,unsigned short port)26728044fc1SJoanne Koong inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
26828044fc1SJoanne Koong const struct net *net, unsigned short port)
26928044fc1SJoanne Koong {
27028044fc1SJoanne Koong u32 hash;
27128044fc1SJoanne Koong
27228044fc1SJoanne Koong #if IS_ENABLED(CONFIG_IPV6)
27328044fc1SJoanne Koong if (sk->sk_family == AF_INET6)
27428044fc1SJoanne Koong hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
27528044fc1SJoanne Koong else
27628044fc1SJoanne Koong #endif
27728044fc1SJoanne Koong hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
27828044fc1SJoanne Koong return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
27928044fc1SJoanne Koong }
28028044fc1SJoanne Koong
28128044fc1SJoanne Koong struct inet_bind_hashbucket *
28228044fc1SJoanne Koong inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
28328044fc1SJoanne Koong
28428044fc1SJoanne Koong /* This should be called whenever a socket's sk_rcv_saddr (ipv4) or
28528044fc1SJoanne Koong * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
28628044fc1SJoanne Koong * rcv_saddr field should already have been updated when this is called.
28728044fc1SJoanne Koong */
2888c5dae4cSKuniyuki Iwashima int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family);
289e0833d1fSKuniyuki Iwashima void inet_bhash2_reset_saddr(struct sock *sk);
29028044fc1SJoanne Koong
2911fd51155SJoe Perches void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
29228044fc1SJoanne Koong struct inet_bind2_bucket *tb2, unsigned short port);
2932d8c4ce5SArnaldo Carvalho de Melo
2942d8c4ce5SArnaldo Carvalho de Melo /* Caller must disable local BH processing. */
2951ce31c9eSEric Dumazet int __inet_inherit_port(const struct sock *sk, struct sock *child);
2962d8c4ce5SArnaldo Carvalho de Melo
2971fd51155SJoe Perches void inet_put_port(struct sock *sk);
2982d8c4ce5SArnaldo Carvalho de Melo
29961b7c691SMartin KaFai Lau void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
30061b7c691SMartin KaFai Lau unsigned long numentries, int scale,
30161b7c691SMartin KaFai Lau unsigned long low_limit,
30261b7c691SMartin KaFai Lau unsigned long high_limit);
303c92c81dfSPeter Oskolkov int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
304f3f05f70SArnaldo Carvalho de Melo
30501770a16SRicardo Dias bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
30601770a16SRicardo Dias bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
30701770a16SRicardo Dias bool *found_dup_sk);
308fe38d2a1SJosef Bacik int __inet_hash(struct sock *sk, struct sock *osk);
309086c653fSCraig Gallek int inet_hash(struct sock *sk);
3101fd51155SJoe Perches void inet_unhash(struct sock *sk);
31133b62231SArnaldo Carvalho de Melo
3121fd51155SJoe Perches struct sock *__inet_lookup_listener(struct net *net,
313c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo,
314a583636aSCraig Gallek struct sk_buff *skb, int doff,
3151fd51155SJoe Perches const __be32 saddr, const __be16 sport,
316fb99c848SAl Viro const __be32 daddr,
3178f491069SHerbert Xu const unsigned short hnum,
3183fa6f616SDavid Ahern const int dif, const int sdif);
3198f491069SHerbert Xu
inet_lookup_listener(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,__be32 saddr,__be16 sport,__be32 daddr,__be16 dport,int dif,int sdif)320c67499c0SPavel Emelyanov static inline struct sock *inet_lookup_listener(struct net *net,
321c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo,
322a583636aSCraig Gallek struct sk_buff *skb, int doff,
323da5e3630STom Herbert __be32 saddr, __be16 sport,
3243fa6f616SDavid Ahern __be32 daddr, __be16 dport, int dif, int sdif)
3258f491069SHerbert Xu {
326a583636aSCraig Gallek return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
3273fa6f616SDavid Ahern daddr, ntohs(dport), dif, sdif);
3288f491069SHerbert Xu }
3298feaf0c0SArnaldo Carvalho de Melo
3308feaf0c0SArnaldo Carvalho de Melo /* Socket demux engine toys. */
3314f765d84SAl Viro /* What happens here is ugly; there's a pair of adjacent fields in
3324f765d84SAl Viro struct inet_sock; __be16 dport followed by __u16 num. We want to
3334f765d84SAl Viro search by pair, so we combine the keys into a single 32bit value
3344f765d84SAl Viro and compare with 32bit value read from &...->dport. Let's at least
3354f765d84SAl Viro make sure that it's not mixed with anything else...
3364f765d84SAl Viro On 64bit targets we combine comparisons with pair of adjacent __be32
3374f765d84SAl Viro fields in the same way.
3384f765d84SAl Viro */
3398feaf0c0SArnaldo Carvalho de Melo #ifdef __BIG_ENDIAN
3408feaf0c0SArnaldo Carvalho de Melo #define INET_COMBINED_PORTS(__sport, __dport) \
3414f765d84SAl Viro ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport)))
3428feaf0c0SArnaldo Carvalho de Melo #else /* __LITTLE_ENDIAN */
3438feaf0c0SArnaldo Carvalho de Melo #define INET_COMBINED_PORTS(__sport, __dport) \
3444f765d84SAl Viro ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
3458feaf0c0SArnaldo Carvalho de Melo #endif
3468feaf0c0SArnaldo Carvalho de Melo
3478feaf0c0SArnaldo Carvalho de Melo #ifdef __BIG_ENDIAN
3488feaf0c0SArnaldo Carvalho de Melo #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
3494f765d84SAl Viro const __addrpair __name = (__force __addrpair) ( \
3504f765d84SAl Viro (((__force __u64)(__be32)(__saddr)) << 32) | \
351c7228317SJoe Perches ((__force __u64)(__be32)(__daddr)))
3528feaf0c0SArnaldo Carvalho de Melo #else /* __LITTLE_ENDIAN */
3538feaf0c0SArnaldo Carvalho de Melo #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
3544f765d84SAl Viro const __addrpair __name = (__force __addrpair) ( \
3554f765d84SAl Viro (((__force __u64)(__be32)(__daddr)) << 32) | \
356c7228317SJoe Perches ((__force __u64)(__be32)(__saddr)))
3578feaf0c0SArnaldo Carvalho de Melo #endif /* __BIG_ENDIAN */
358c7228317SJoe Perches
inet_match(struct net * net,const struct sock * sk,const __addrpair cookie,const __portpair ports,int dif,int sdif)359eda090c3SEric Dumazet static inline bool inet_match(struct net *net, const struct sock *sk,
3604915d50eSEric Dumazet const __addrpair cookie, const __portpair ports,
3614915d50eSEric Dumazet int dif, int sdif)
3624915d50eSEric Dumazet {
3634915d50eSEric Dumazet if (!net_eq(sock_net(sk), net) ||
3644915d50eSEric Dumazet sk->sk_portpair != ports ||
3654915d50eSEric Dumazet sk->sk_addrpair != cookie)
3664915d50eSEric Dumazet return false;
3674915d50eSEric Dumazet
368944fd1aeSMike Manning /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
369944fd1aeSMike Manning return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
370944fd1aeSMike Manning sdif);
3714915d50eSEric Dumazet }
372e48c414eSArnaldo Carvalho de Melo
373ee3cf32aSEric Dumazet /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
374e48c414eSArnaldo Carvalho de Melo * not check it for lookups anymore, thanks Alexey. -DaveM
375e48c414eSArnaldo Carvalho de Melo */
3761fd51155SJoe Perches struct sock *__inet_lookup_established(struct net *net,
377c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo,
378fb99c848SAl Viro const __be32 saddr, const __be16 sport,
3791fd51155SJoe Perches const __be32 daddr, const u16 hnum,
3803fa6f616SDavid Ahern const int dif, const int sdif);
381e48c414eSArnaldo Carvalho de Melo
3820f495f76SLorenz Bauer typedef u32 (inet_ehashfn_t)(const struct net *net,
3830f495f76SLorenz Bauer const __be32 laddr, const __u16 lport,
3840f495f76SLorenz Bauer const __be32 faddr, const __be16 fport);
3850f495f76SLorenz Bauer
3860f495f76SLorenz Bauer inet_ehashfn_t inet_ehashfn;
3870f495f76SLorenz Bauer
3880f495f76SLorenz Bauer INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
3890f495f76SLorenz Bauer
390ce796e60SLorenz Bauer struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
391ce796e60SLorenz Bauer struct sk_buff *skb, int doff,
392ce796e60SLorenz Bauer __be32 saddr, __be16 sport,
3930f495f76SLorenz Bauer __be32 daddr, unsigned short hnum,
3940f495f76SLorenz Bauer inet_ehashfn_t *ehashfn);
395ce796e60SLorenz Bauer
3966c886db2SLorenz Bauer struct sock *inet_lookup_run_sk_lookup(struct net *net,
3976c886db2SLorenz Bauer int protocol,
3986c886db2SLorenz Bauer struct sk_buff *skb, int doff,
3996c886db2SLorenz Bauer __be32 saddr, __be16 sport,
4006c886db2SLorenz Bauer __be32 daddr, u16 hnum, const int dif,
4016c886db2SLorenz Bauer inet_ehashfn_t *ehashfn);
4026c886db2SLorenz Bauer
4038f491069SHerbert Xu static inline struct sock *
inet_lookup_established(struct net * net,struct inet_hashinfo * hashinfo,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif)404c67499c0SPavel Emelyanov inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
405fb99c848SAl Viro const __be32 saddr, const __be16 sport,
406fb99c848SAl Viro const __be32 daddr, const __be16 dport,
407e48c414eSArnaldo Carvalho de Melo const int dif)
408e48c414eSArnaldo Carvalho de Melo {
409c67499c0SPavel Emelyanov return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
4103fa6f616SDavid Ahern ntohs(dport), dif, 0);
4118f491069SHerbert Xu }
4128f491069SHerbert Xu
__inet_lookup(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif,const int sdif,bool * refcounted)413c67499c0SPavel Emelyanov static inline struct sock *__inet_lookup(struct net *net,
414c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo,
415a583636aSCraig Gallek struct sk_buff *skb, int doff,
416fb99c848SAl Viro const __be32 saddr, const __be16 sport,
417fb99c848SAl Viro const __be32 daddr, const __be16 dport,
4183fa6f616SDavid Ahern const int dif, const int sdif,
4193b24d854SEric Dumazet bool *refcounted)
4208f491069SHerbert Xu {
4218f491069SHerbert Xu u16 hnum = ntohs(dport);
4223b24d854SEric Dumazet struct sock *sk;
423c67499c0SPavel Emelyanov
4243b24d854SEric Dumazet sk = __inet_lookup_established(net, hashinfo, saddr, sport,
4253fa6f616SDavid Ahern daddr, hnum, dif, sdif);
4263b24d854SEric Dumazet *refcounted = true;
4273b24d854SEric Dumazet if (sk)
4283b24d854SEric Dumazet return sk;
4293b24d854SEric Dumazet *refcounted = false;
4303b24d854SEric Dumazet return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
4313fa6f616SDavid Ahern sport, daddr, hnum, dif, sdif);
432e48c414eSArnaldo Carvalho de Melo }
433e48c414eSArnaldo Carvalho de Melo
inet_lookup(struct net * net,struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,const int dif)434c67499c0SPavel Emelyanov static inline struct sock *inet_lookup(struct net *net,
435c67499c0SPavel Emelyanov struct inet_hashinfo *hashinfo,
436a583636aSCraig Gallek struct sk_buff *skb, int doff,
437fb99c848SAl Viro const __be32 saddr, const __be16 sport,
438fb99c848SAl Viro const __be32 daddr, const __be16 dport,
439e48c414eSArnaldo Carvalho de Melo const int dif)
440e48c414eSArnaldo Carvalho de Melo {
441e48c414eSArnaldo Carvalho de Melo struct sock *sk;
4423b24d854SEric Dumazet bool refcounted;
443e48c414eSArnaldo Carvalho de Melo
444a583636aSCraig Gallek sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
4453fa6f616SDavid Ahern dport, dif, 0, &refcounted);
446e48c414eSArnaldo Carvalho de Melo
44741c6d650SReshetova, Elena if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
4483b24d854SEric Dumazet sk = NULL;
449e48c414eSArnaldo Carvalho de Melo return sk;
450e48c414eSArnaldo Carvalho de Melo }
451a7f5e7f1SArnaldo Carvalho de Melo
4529c02bec9SLorenz Bauer static inline
inet_steal_sock(struct net * net,struct sk_buff * skb,int doff,const __be32 saddr,const __be16 sport,const __be32 daddr,const __be16 dport,bool * refcounted,inet_ehashfn_t * ehashfn)4539c02bec9SLorenz Bauer struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
4549c02bec9SLorenz Bauer const __be32 saddr, const __be16 sport,
4559c02bec9SLorenz Bauer const __be32 daddr, const __be16 dport,
4569c02bec9SLorenz Bauer bool *refcounted, inet_ehashfn_t *ehashfn)
4579c02bec9SLorenz Bauer {
4589c02bec9SLorenz Bauer struct sock *sk, *reuse_sk;
4599c02bec9SLorenz Bauer bool prefetched;
4609c02bec9SLorenz Bauer
4619c02bec9SLorenz Bauer sk = skb_steal_sock(skb, refcounted, &prefetched);
4629c02bec9SLorenz Bauer if (!sk)
4639c02bec9SLorenz Bauer return NULL;
4649c02bec9SLorenz Bauer
465*8897562fSLorenz Bauer if (!prefetched || !sk_fullsock(sk))
4669c02bec9SLorenz Bauer return sk;
4679c02bec9SLorenz Bauer
4689c02bec9SLorenz Bauer if (sk->sk_protocol == IPPROTO_TCP) {
4699c02bec9SLorenz Bauer if (sk->sk_state != TCP_LISTEN)
4709c02bec9SLorenz Bauer return sk;
4719c02bec9SLorenz Bauer } else if (sk->sk_protocol == IPPROTO_UDP) {
4729c02bec9SLorenz Bauer if (sk->sk_state != TCP_CLOSE)
4739c02bec9SLorenz Bauer return sk;
4749c02bec9SLorenz Bauer } else {
4759c02bec9SLorenz Bauer return sk;
4769c02bec9SLorenz Bauer }
4779c02bec9SLorenz Bauer
4789c02bec9SLorenz Bauer reuse_sk = inet_lookup_reuseport(net, sk, skb, doff,
4799c02bec9SLorenz Bauer saddr, sport, daddr, ntohs(dport),
4809c02bec9SLorenz Bauer ehashfn);
4819c02bec9SLorenz Bauer if (!reuse_sk)
4829c02bec9SLorenz Bauer return sk;
4839c02bec9SLorenz Bauer
4849c02bec9SLorenz Bauer /* We've chosen a new reuseport sock which is never refcounted. This
4859c02bec9SLorenz Bauer * implies that sk also isn't refcounted.
4869c02bec9SLorenz Bauer */
4879c02bec9SLorenz Bauer WARN_ON_ONCE(*refcounted);
4889c02bec9SLorenz Bauer
4899c02bec9SLorenz Bauer return reuse_sk;
4909c02bec9SLorenz Bauer }
4919c02bec9SLorenz Bauer
__inet_lookup_skb(struct inet_hashinfo * hashinfo,struct sk_buff * skb,int doff,const __be16 sport,const __be16 dport,const int sdif,bool * refcounted)4929a1f27c4SArnaldo Carvalho de Melo static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
4939a1f27c4SArnaldo Carvalho de Melo struct sk_buff *skb,
494a583636aSCraig Gallek int doff,
4959a1f27c4SArnaldo Carvalho de Melo const __be16 sport,
4963b24d854SEric Dumazet const __be16 dport,
4973fa6f616SDavid Ahern const int sdif,
4983b24d854SEric Dumazet bool *refcounted)
4999a1f27c4SArnaldo Carvalho de Melo {
5009c02bec9SLorenz Bauer struct net *net = dev_net(skb_dst(skb)->dev);
5019a1f27c4SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb);
5029c02bec9SLorenz Bauer struct sock *sk;
5039a1f27c4SArnaldo Carvalho de Melo
5049c02bec9SLorenz Bauer sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport,
5059c02bec9SLorenz Bauer refcounted, inet_ehashfn);
5069c02bec9SLorenz Bauer if (IS_ERR(sk))
5079c02bec9SLorenz Bauer return NULL;
50841063e9dSDavid S. Miller if (sk)
50923542618SKOVACS Krisztian return sk;
5103b24d854SEric Dumazet
5119c02bec9SLorenz Bauer return __inet_lookup(net, hashinfo, skb,
512a583636aSCraig Gallek doff, iph->saddr, sport,
5133fa6f616SDavid Ahern iph->daddr, dport, inet_iif(skb), sdif,
5143b24d854SEric Dumazet refcounted);
5159a1f27c4SArnaldo Carvalho de Melo }
5169a1f27c4SArnaldo Carvalho de Melo
sk_daddr_set(struct sock * sk,__be32 addr)517d1e559d0SEric Dumazet static inline void sk_daddr_set(struct sock *sk, __be32 addr)
518d1e559d0SEric Dumazet {
519d1e559d0SEric Dumazet sk->sk_daddr = addr; /* alias of inet_daddr */
520d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
521d1e559d0SEric Dumazet ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr);
522d1e559d0SEric Dumazet #endif
523d1e559d0SEric Dumazet }
524d1e559d0SEric Dumazet
sk_rcv_saddr_set(struct sock * sk,__be32 addr)525d1e559d0SEric Dumazet static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
526d1e559d0SEric Dumazet {
527d1e559d0SEric Dumazet sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */
528d1e559d0SEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
529d1e559d0SEric Dumazet ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr);
530d1e559d0SEric Dumazet #endif
531d1e559d0SEric Dumazet }
5325b441f76SEric Dumazet
5331fd51155SJoe Perches int __inet_hash_connect(struct inet_timewait_death_row *death_row,
534b2d05756SWilly Tarreau struct sock *sk, u64 port_offset,
5355ee31fc1SPavel Emelyanov int (*check_established)(struct inet_timewait_death_row *,
5361fd51155SJoe Perches struct sock *, __u16,
537b4d6444eSEric Dumazet struct inet_timewait_sock **));
5389327f705SEric Dumazet
5391fd51155SJoe Perches int inet_hash_connect(struct inet_timewait_death_row *death_row,
540a7f5e7f1SArnaldo Carvalho de Melo struct sock *sk);
541304a1618SArnaldo Carvalho de Melo #endif /* _INET_HASHTABLES_H */
542