1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2ef456144SCraig Gallek /*
3ef456144SCraig Gallek * To speed up listener socket lookup, create an array to store all sockets
4ef456144SCraig Gallek * listening on the same port. This allows a decision to be made after finding
5538950a1SCraig Gallek * the first socket. An optional BPF program can also be configured for
6538950a1SCraig Gallek * selecting the socket index from the array of available sockets.
7ef456144SCraig Gallek */
8ef456144SCraig Gallek
955d444b3SKuniyuki Iwashima #include <net/ip.h>
10ef456144SCraig Gallek #include <net/sock_reuseport.h>
11538950a1SCraig Gallek #include <linux/bpf.h>
12736b4602SMartin KaFai Lau #include <linux/idr.h>
138217ca65SMartin KaFai Lau #include <linux/filter.h>
14ef456144SCraig Gallek #include <linux/rcupdate.h>
15ef456144SCraig Gallek
16ef456144SCraig Gallek #define INIT_SOCKS 128
17ef456144SCraig Gallek
18736b4602SMartin KaFai Lau DEFINE_SPINLOCK(reuseport_lock);
19736b4602SMartin KaFai Lau
20736b4602SMartin KaFai Lau static DEFINE_IDA(reuseport_ida);
21333bb73fSKuniyuki Iwashima static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
22333bb73fSKuniyuki Iwashima struct sock_reuseport *reuse, bool bind_inany);
23736b4602SMartin KaFai Lau
reuseport_has_conns_set(struct sock * sk)2469421bf9SKuniyuki Iwashima void reuseport_has_conns_set(struct sock *sk)
2569421bf9SKuniyuki Iwashima {
2669421bf9SKuniyuki Iwashima struct sock_reuseport *reuse;
2769421bf9SKuniyuki Iwashima
2869421bf9SKuniyuki Iwashima if (!rcu_access_pointer(sk->sk_reuseport_cb))
2969421bf9SKuniyuki Iwashima return;
3069421bf9SKuniyuki Iwashima
3169421bf9SKuniyuki Iwashima spin_lock_bh(&reuseport_lock);
3269421bf9SKuniyuki Iwashima reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
3369421bf9SKuniyuki Iwashima lockdep_is_held(&reuseport_lock));
3469421bf9SKuniyuki Iwashima if (likely(reuse))
3569421bf9SKuniyuki Iwashima reuse->has_conns = 1;
3669421bf9SKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
3769421bf9SKuniyuki Iwashima }
3869421bf9SKuniyuki Iwashima EXPORT_SYMBOL(reuseport_has_conns_set);
3969421bf9SKuniyuki Iwashima
__reuseport_get_incoming_cpu(struct sock_reuseport * reuse)40*b261eda8SKuniyuki Iwashima static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse)
41*b261eda8SKuniyuki Iwashima {
42*b261eda8SKuniyuki Iwashima /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
43*b261eda8SKuniyuki Iwashima WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1);
44*b261eda8SKuniyuki Iwashima }
45*b261eda8SKuniyuki Iwashima
__reuseport_put_incoming_cpu(struct sock_reuseport * reuse)46*b261eda8SKuniyuki Iwashima static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse)
47*b261eda8SKuniyuki Iwashima {
48*b261eda8SKuniyuki Iwashima /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
49*b261eda8SKuniyuki Iwashima WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1);
50*b261eda8SKuniyuki Iwashima }
51*b261eda8SKuniyuki Iwashima
reuseport_get_incoming_cpu(struct sock * sk,struct sock_reuseport * reuse)52*b261eda8SKuniyuki Iwashima static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse)
53*b261eda8SKuniyuki Iwashima {
54*b261eda8SKuniyuki Iwashima if (sk->sk_incoming_cpu >= 0)
55*b261eda8SKuniyuki Iwashima __reuseport_get_incoming_cpu(reuse);
56*b261eda8SKuniyuki Iwashima }
57*b261eda8SKuniyuki Iwashima
reuseport_put_incoming_cpu(struct sock * sk,struct sock_reuseport * reuse)58*b261eda8SKuniyuki Iwashima static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse)
59*b261eda8SKuniyuki Iwashima {
60*b261eda8SKuniyuki Iwashima if (sk->sk_incoming_cpu >= 0)
61*b261eda8SKuniyuki Iwashima __reuseport_put_incoming_cpu(reuse);
62*b261eda8SKuniyuki Iwashima }
63*b261eda8SKuniyuki Iwashima
reuseport_update_incoming_cpu(struct sock * sk,int val)64*b261eda8SKuniyuki Iwashima void reuseport_update_incoming_cpu(struct sock *sk, int val)
65*b261eda8SKuniyuki Iwashima {
66*b261eda8SKuniyuki Iwashima struct sock_reuseport *reuse;
67*b261eda8SKuniyuki Iwashima int old_sk_incoming_cpu;
68*b261eda8SKuniyuki Iwashima
69*b261eda8SKuniyuki Iwashima if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) {
70*b261eda8SKuniyuki Iwashima /* Paired with REAE_ONCE() in sk_incoming_cpu_update()
71*b261eda8SKuniyuki Iwashima * and compute_score().
72*b261eda8SKuniyuki Iwashima */
73*b261eda8SKuniyuki Iwashima WRITE_ONCE(sk->sk_incoming_cpu, val);
74*b261eda8SKuniyuki Iwashima return;
75*b261eda8SKuniyuki Iwashima }
76*b261eda8SKuniyuki Iwashima
77*b261eda8SKuniyuki Iwashima spin_lock_bh(&reuseport_lock);
78*b261eda8SKuniyuki Iwashima
79*b261eda8SKuniyuki Iwashima /* This must be done under reuseport_lock to avoid a race with
80*b261eda8SKuniyuki Iwashima * reuseport_grow(), which accesses sk->sk_incoming_cpu without
81*b261eda8SKuniyuki Iwashima * lock_sock() when detaching a shutdown()ed sk.
82*b261eda8SKuniyuki Iwashima *
83*b261eda8SKuniyuki Iwashima * Paired with READ_ONCE() in reuseport_select_sock_by_hash().
84*b261eda8SKuniyuki Iwashima */
85*b261eda8SKuniyuki Iwashima old_sk_incoming_cpu = sk->sk_incoming_cpu;
86*b261eda8SKuniyuki Iwashima WRITE_ONCE(sk->sk_incoming_cpu, val);
87*b261eda8SKuniyuki Iwashima
88*b261eda8SKuniyuki Iwashima reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
89*b261eda8SKuniyuki Iwashima lockdep_is_held(&reuseport_lock));
90*b261eda8SKuniyuki Iwashima
91*b261eda8SKuniyuki Iwashima /* reuseport_grow() has detached a closed sk. */
92*b261eda8SKuniyuki Iwashima if (!reuse)
93*b261eda8SKuniyuki Iwashima goto out;
94*b261eda8SKuniyuki Iwashima
95*b261eda8SKuniyuki Iwashima if (old_sk_incoming_cpu < 0 && val >= 0)
96*b261eda8SKuniyuki Iwashima __reuseport_get_incoming_cpu(reuse);
97*b261eda8SKuniyuki Iwashima else if (old_sk_incoming_cpu >= 0 && val < 0)
98*b261eda8SKuniyuki Iwashima __reuseport_put_incoming_cpu(reuse);
99*b261eda8SKuniyuki Iwashima
100*b261eda8SKuniyuki Iwashima out:
101*b261eda8SKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
102*b261eda8SKuniyuki Iwashima }
103*b261eda8SKuniyuki Iwashima
reuseport_sock_index(struct sock * sk,const struct sock_reuseport * reuse,bool closed)1045c040eafSKuniyuki Iwashima static int reuseport_sock_index(struct sock *sk,
1055c040eafSKuniyuki Iwashima const struct sock_reuseport *reuse,
1065c040eafSKuniyuki Iwashima bool closed)
1075c040eafSKuniyuki Iwashima {
1085c040eafSKuniyuki Iwashima int left, right;
1095c040eafSKuniyuki Iwashima
1105c040eafSKuniyuki Iwashima if (!closed) {
1115c040eafSKuniyuki Iwashima left = 0;
1125c040eafSKuniyuki Iwashima right = reuse->num_socks;
1135c040eafSKuniyuki Iwashima } else {
1145c040eafSKuniyuki Iwashima left = reuse->max_socks - reuse->num_closed_socks;
1155c040eafSKuniyuki Iwashima right = reuse->max_socks;
1165c040eafSKuniyuki Iwashima }
1175c040eafSKuniyuki Iwashima
1185c040eafSKuniyuki Iwashima for (; left < right; left++)
1195c040eafSKuniyuki Iwashima if (reuse->socks[left] == sk)
1205c040eafSKuniyuki Iwashima return left;
1215c040eafSKuniyuki Iwashima return -1;
1225c040eafSKuniyuki Iwashima }
1235c040eafSKuniyuki Iwashima
__reuseport_add_sock(struct sock * sk,struct sock_reuseport * reuse)1245c040eafSKuniyuki Iwashima static void __reuseport_add_sock(struct sock *sk,
1255c040eafSKuniyuki Iwashima struct sock_reuseport *reuse)
1265c040eafSKuniyuki Iwashima {
1275c040eafSKuniyuki Iwashima reuse->socks[reuse->num_socks] = sk;
1281cd62c21SKuniyuki Iwashima /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
1295c040eafSKuniyuki Iwashima smp_wmb();
1305c040eafSKuniyuki Iwashima reuse->num_socks++;
131*b261eda8SKuniyuki Iwashima reuseport_get_incoming_cpu(sk, reuse);
1325c040eafSKuniyuki Iwashima }
1335c040eafSKuniyuki Iwashima
__reuseport_detach_sock(struct sock * sk,struct sock_reuseport * reuse)1345c040eafSKuniyuki Iwashima static bool __reuseport_detach_sock(struct sock *sk,
1355c040eafSKuniyuki Iwashima struct sock_reuseport *reuse)
1365c040eafSKuniyuki Iwashima {
1375c040eafSKuniyuki Iwashima int i = reuseport_sock_index(sk, reuse, false);
1385c040eafSKuniyuki Iwashima
1395c040eafSKuniyuki Iwashima if (i == -1)
1405c040eafSKuniyuki Iwashima return false;
1415c040eafSKuniyuki Iwashima
1425c040eafSKuniyuki Iwashima reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
1435c040eafSKuniyuki Iwashima reuse->num_socks--;
144*b261eda8SKuniyuki Iwashima reuseport_put_incoming_cpu(sk, reuse);
1455c040eafSKuniyuki Iwashima
1465c040eafSKuniyuki Iwashima return true;
1475c040eafSKuniyuki Iwashima }
1485c040eafSKuniyuki Iwashima
__reuseport_add_closed_sock(struct sock * sk,struct sock_reuseport * reuse)149333bb73fSKuniyuki Iwashima static void __reuseport_add_closed_sock(struct sock *sk,
150333bb73fSKuniyuki Iwashima struct sock_reuseport *reuse)
151333bb73fSKuniyuki Iwashima {
152333bb73fSKuniyuki Iwashima reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
153333bb73fSKuniyuki Iwashima /* paired with READ_ONCE() in inet_csk_bind_conflict() */
154333bb73fSKuniyuki Iwashima WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
155*b261eda8SKuniyuki Iwashima reuseport_get_incoming_cpu(sk, reuse);
156333bb73fSKuniyuki Iwashima }
157333bb73fSKuniyuki Iwashima
__reuseport_detach_closed_sock(struct sock * sk,struct sock_reuseport * reuse)158333bb73fSKuniyuki Iwashima static bool __reuseport_detach_closed_sock(struct sock *sk,
159333bb73fSKuniyuki Iwashima struct sock_reuseport *reuse)
160333bb73fSKuniyuki Iwashima {
161333bb73fSKuniyuki Iwashima int i = reuseport_sock_index(sk, reuse, true);
162333bb73fSKuniyuki Iwashima
163333bb73fSKuniyuki Iwashima if (i == -1)
164333bb73fSKuniyuki Iwashima return false;
165333bb73fSKuniyuki Iwashima
166333bb73fSKuniyuki Iwashima reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
167333bb73fSKuniyuki Iwashima /* paired with READ_ONCE() in inet_csk_bind_conflict() */
168333bb73fSKuniyuki Iwashima WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
169*b261eda8SKuniyuki Iwashima reuseport_put_incoming_cpu(sk, reuse);
170333bb73fSKuniyuki Iwashima
171333bb73fSKuniyuki Iwashima return true;
172333bb73fSKuniyuki Iwashima }
173333bb73fSKuniyuki Iwashima
__reuseport_alloc(unsigned int max_socks)174822f9bb1SAlexey Dobriyan static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
175ef456144SCraig Gallek {
176822f9bb1SAlexey Dobriyan unsigned int size = sizeof(struct sock_reuseport) +
177ef456144SCraig Gallek sizeof(struct sock *) * max_socks;
178ef456144SCraig Gallek struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
179ef456144SCraig Gallek
180ef456144SCraig Gallek if (!reuse)
181ef456144SCraig Gallek return NULL;
182ef456144SCraig Gallek
183ef456144SCraig Gallek reuse->max_socks = max_socks;
184ef456144SCraig Gallek
185538950a1SCraig Gallek RCU_INIT_POINTER(reuse->prog, NULL);
186ef456144SCraig Gallek return reuse;
187ef456144SCraig Gallek }
188ef456144SCraig Gallek
reuseport_alloc(struct sock * sk,bool bind_inany)1892dbb9b9eSMartin KaFai Lau int reuseport_alloc(struct sock *sk, bool bind_inany)
190ef456144SCraig Gallek {
191ef456144SCraig Gallek struct sock_reuseport *reuse;
192035ff358SJakub Sitnicki int id, ret = 0;
193ef456144SCraig Gallek
194ef456144SCraig Gallek /* bh lock used since this function call may precede hlist lock in
195ef456144SCraig Gallek * soft irq of receive path or setsockopt from process context
196ef456144SCraig Gallek */
197ef456144SCraig Gallek spin_lock_bh(&reuseport_lock);
1981b5f962eSCraig Gallek
1991b5f962eSCraig Gallek /* Allocation attempts can occur concurrently via the setsockopt path
2001b5f962eSCraig Gallek * and the bind/hash path. Nothing to do when we lose the race.
2011b5f962eSCraig Gallek */
2022dbb9b9eSMartin KaFai Lau reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
2032dbb9b9eSMartin KaFai Lau lockdep_is_held(&reuseport_lock));
2042dbb9b9eSMartin KaFai Lau if (reuse) {
205333bb73fSKuniyuki Iwashima if (reuse->num_closed_socks) {
206333bb73fSKuniyuki Iwashima /* sk was shutdown()ed before */
207333bb73fSKuniyuki Iwashima ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
208333bb73fSKuniyuki Iwashima goto out;
209333bb73fSKuniyuki Iwashima }
210333bb73fSKuniyuki Iwashima
2112dbb9b9eSMartin KaFai Lau /* Only set reuse->bind_inany if the bind_inany is true.
2122dbb9b9eSMartin KaFai Lau * Otherwise, it will overwrite the reuse->bind_inany
2132dbb9b9eSMartin KaFai Lau * which was set by the bind/hash path.
2142dbb9b9eSMartin KaFai Lau */
2152dbb9b9eSMartin KaFai Lau if (bind_inany)
2162dbb9b9eSMartin KaFai Lau reuse->bind_inany = bind_inany;
2171b5f962eSCraig Gallek goto out;
2182dbb9b9eSMartin KaFai Lau }
2191b5f962eSCraig Gallek
220ef456144SCraig Gallek reuse = __reuseport_alloc(INIT_SOCKS);
221ef456144SCraig Gallek if (!reuse) {
222035ff358SJakub Sitnicki ret = -ENOMEM;
223035ff358SJakub Sitnicki goto out;
224ef456144SCraig Gallek }
225ef456144SCraig Gallek
226035ff358SJakub Sitnicki id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
227035ff358SJakub Sitnicki if (id < 0) {
228035ff358SJakub Sitnicki kfree(reuse);
229035ff358SJakub Sitnicki ret = id;
230035ff358SJakub Sitnicki goto out;
231035ff358SJakub Sitnicki }
232035ff358SJakub Sitnicki
233035ff358SJakub Sitnicki reuse->reuseport_id = id;
2345c040eafSKuniyuki Iwashima reuse->bind_inany = bind_inany;
235ef456144SCraig Gallek reuse->socks[0] = sk;
236ef456144SCraig Gallek reuse->num_socks = 1;
237*b261eda8SKuniyuki Iwashima reuseport_get_incoming_cpu(sk, reuse);
238ef456144SCraig Gallek rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
239ef456144SCraig Gallek
2401b5f962eSCraig Gallek out:
241ef456144SCraig Gallek spin_unlock_bh(&reuseport_lock);
242ef456144SCraig Gallek
243035ff358SJakub Sitnicki return ret;
244ef456144SCraig Gallek }
245ef456144SCraig Gallek EXPORT_SYMBOL(reuseport_alloc);
246ef456144SCraig Gallek
reuseport_grow(struct sock_reuseport * reuse)247ef456144SCraig Gallek static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
248ef456144SCraig Gallek {
249ef456144SCraig Gallek struct sock_reuseport *more_reuse;
250ef456144SCraig Gallek u32 more_socks_size, i;
251ef456144SCraig Gallek
252ef456144SCraig Gallek more_socks_size = reuse->max_socks * 2U;
253333bb73fSKuniyuki Iwashima if (more_socks_size > U16_MAX) {
254333bb73fSKuniyuki Iwashima if (reuse->num_closed_socks) {
255333bb73fSKuniyuki Iwashima /* Make room by removing a closed sk.
256333bb73fSKuniyuki Iwashima * The child has already been migrated.
257333bb73fSKuniyuki Iwashima * Only reqsk left at this point.
258333bb73fSKuniyuki Iwashima */
259333bb73fSKuniyuki Iwashima struct sock *sk;
260333bb73fSKuniyuki Iwashima
261333bb73fSKuniyuki Iwashima sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
262333bb73fSKuniyuki Iwashima RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
263333bb73fSKuniyuki Iwashima __reuseport_detach_closed_sock(sk, reuse);
264333bb73fSKuniyuki Iwashima
265333bb73fSKuniyuki Iwashima return reuse;
266333bb73fSKuniyuki Iwashima }
267333bb73fSKuniyuki Iwashima
268ef456144SCraig Gallek return NULL;
269333bb73fSKuniyuki Iwashima }
270ef456144SCraig Gallek
271ef456144SCraig Gallek more_reuse = __reuseport_alloc(more_socks_size);
272ef456144SCraig Gallek if (!more_reuse)
273ef456144SCraig Gallek return NULL;
274ef456144SCraig Gallek
275ef456144SCraig Gallek more_reuse->num_socks = reuse->num_socks;
2765c040eafSKuniyuki Iwashima more_reuse->num_closed_socks = reuse->num_closed_socks;
277538950a1SCraig Gallek more_reuse->prog = reuse->prog;
278736b4602SMartin KaFai Lau more_reuse->reuseport_id = reuse->reuseport_id;
2792dbb9b9eSMartin KaFai Lau more_reuse->bind_inany = reuse->bind_inany;
280f2b2c55eSKuniyuki Iwashima more_reuse->has_conns = reuse->has_conns;
281*b261eda8SKuniyuki Iwashima more_reuse->incoming_cpu = reuse->incoming_cpu;
282ef456144SCraig Gallek
283ef456144SCraig Gallek memcpy(more_reuse->socks, reuse->socks,
284ef456144SCraig Gallek reuse->num_socks * sizeof(struct sock *));
2855c040eafSKuniyuki Iwashima memcpy(more_reuse->socks +
2865c040eafSKuniyuki Iwashima (more_reuse->max_socks - more_reuse->num_closed_socks),
2875c040eafSKuniyuki Iwashima reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
2885c040eafSKuniyuki Iwashima reuse->num_closed_socks * sizeof(struct sock *));
28940a1227eSMartin KaFai Lau more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
290ef456144SCraig Gallek
2915c040eafSKuniyuki Iwashima for (i = 0; i < reuse->max_socks; ++i)
292ef456144SCraig Gallek rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
293ef456144SCraig Gallek more_reuse);
294ef456144SCraig Gallek
295538950a1SCraig Gallek /* Note: we use kfree_rcu here instead of reuseport_free_rcu so
296538950a1SCraig Gallek * that reuse and more_reuse can temporarily share a reference
297538950a1SCraig Gallek * to prog.
298538950a1SCraig Gallek */
299ef456144SCraig Gallek kfree_rcu(reuse, rcu);
300ef456144SCraig Gallek return more_reuse;
301ef456144SCraig Gallek }
302ef456144SCraig Gallek
reuseport_free_rcu(struct rcu_head * head)3034db428a7SEric Dumazet static void reuseport_free_rcu(struct rcu_head *head)
3044db428a7SEric Dumazet {
3054db428a7SEric Dumazet struct sock_reuseport *reuse;
3064db428a7SEric Dumazet
3074db428a7SEric Dumazet reuse = container_of(head, struct sock_reuseport, rcu);
3088217ca65SMartin KaFai Lau sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
309035ff358SJakub Sitnicki ida_free(&reuseport_ida, reuse->reuseport_id);
3104db428a7SEric Dumazet kfree(reuse);
3114db428a7SEric Dumazet }
3124db428a7SEric Dumazet
313ef456144SCraig Gallek /**
314ef456144SCraig Gallek * reuseport_add_sock - Add a socket to the reuseport group of another.
315ef456144SCraig Gallek * @sk: New socket to add to the group.
316ef456144SCraig Gallek * @sk2: Socket belonging to the existing reuseport group.
31737f3c421SBart Van Assche * @bind_inany: Whether or not the group is bound to a local INANY address.
31837f3c421SBart Van Assche *
319ef456144SCraig Gallek * May return ENOMEM and not add socket to group under memory pressure.
320ef456144SCraig Gallek */
reuseport_add_sock(struct sock * sk,struct sock * sk2,bool bind_inany)3212dbb9b9eSMartin KaFai Lau int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
322ef456144SCraig Gallek {
3234db428a7SEric Dumazet struct sock_reuseport *old_reuse, *reuse;
324ef456144SCraig Gallek
325b4ace4f1SCraig Gallek if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
3262dbb9b9eSMartin KaFai Lau int err = reuseport_alloc(sk2, bind_inany);
327b4ace4f1SCraig Gallek
328b4ace4f1SCraig Gallek if (err)
329b4ace4f1SCraig Gallek return err;
330b4ace4f1SCraig Gallek }
331b4ace4f1SCraig Gallek
332ef456144SCraig Gallek spin_lock_bh(&reuseport_lock);
333ef456144SCraig Gallek reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
3344db428a7SEric Dumazet lockdep_is_held(&reuseport_lock));
3354db428a7SEric Dumazet old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
3364db428a7SEric Dumazet lockdep_is_held(&reuseport_lock));
337333bb73fSKuniyuki Iwashima if (old_reuse && old_reuse->num_closed_socks) {
338333bb73fSKuniyuki Iwashima /* sk was shutdown()ed before */
339333bb73fSKuniyuki Iwashima int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
340333bb73fSKuniyuki Iwashima
341333bb73fSKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
342333bb73fSKuniyuki Iwashima return err;
343333bb73fSKuniyuki Iwashima }
344333bb73fSKuniyuki Iwashima
3454db428a7SEric Dumazet if (old_reuse && old_reuse->num_socks != 1) {
3464db428a7SEric Dumazet spin_unlock_bh(&reuseport_lock);
3474db428a7SEric Dumazet return -EBUSY;
3484db428a7SEric Dumazet }
349ef456144SCraig Gallek
3505c040eafSKuniyuki Iwashima if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
351ef456144SCraig Gallek reuse = reuseport_grow(reuse);
352ef456144SCraig Gallek if (!reuse) {
353ef456144SCraig Gallek spin_unlock_bh(&reuseport_lock);
354ef456144SCraig Gallek return -ENOMEM;
355ef456144SCraig Gallek }
356ef456144SCraig Gallek }
357ef456144SCraig Gallek
3585c040eafSKuniyuki Iwashima __reuseport_add_sock(sk, reuse);
359ef456144SCraig Gallek rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
360ef456144SCraig Gallek
361ef456144SCraig Gallek spin_unlock_bh(&reuseport_lock);
362ef456144SCraig Gallek
3634db428a7SEric Dumazet if (old_reuse)
3644db428a7SEric Dumazet call_rcu(&old_reuse->rcu, reuseport_free_rcu);
365ef456144SCraig Gallek return 0;
366ef456144SCraig Gallek }
36776c6d988SXin Long EXPORT_SYMBOL(reuseport_add_sock);
368ef456144SCraig Gallek
reuseport_resurrect(struct sock * sk,struct sock_reuseport * old_reuse,struct sock_reuseport * reuse,bool bind_inany)369333bb73fSKuniyuki Iwashima static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
370333bb73fSKuniyuki Iwashima struct sock_reuseport *reuse, bool bind_inany)
371333bb73fSKuniyuki Iwashima {
372333bb73fSKuniyuki Iwashima if (old_reuse == reuse) {
373333bb73fSKuniyuki Iwashima /* If sk was in the same reuseport group, just pop sk out of
374333bb73fSKuniyuki Iwashima * the closed section and push sk into the listening section.
375333bb73fSKuniyuki Iwashima */
376333bb73fSKuniyuki Iwashima __reuseport_detach_closed_sock(sk, old_reuse);
377333bb73fSKuniyuki Iwashima __reuseport_add_sock(sk, old_reuse);
378333bb73fSKuniyuki Iwashima return 0;
379333bb73fSKuniyuki Iwashima }
380333bb73fSKuniyuki Iwashima
381333bb73fSKuniyuki Iwashima if (!reuse) {
382333bb73fSKuniyuki Iwashima /* In bind()/listen() path, we cannot carry over the eBPF prog
383333bb73fSKuniyuki Iwashima * for the shutdown()ed socket. In setsockopt() path, we should
384333bb73fSKuniyuki Iwashima * not change the eBPF prog of listening sockets by attaching a
385333bb73fSKuniyuki Iwashima * prog to the shutdown()ed socket. Thus, we will allocate a new
386333bb73fSKuniyuki Iwashima * reuseport group and detach sk from the old group.
387333bb73fSKuniyuki Iwashima */
388333bb73fSKuniyuki Iwashima int id;
389333bb73fSKuniyuki Iwashima
390333bb73fSKuniyuki Iwashima reuse = __reuseport_alloc(INIT_SOCKS);
391333bb73fSKuniyuki Iwashima if (!reuse)
392333bb73fSKuniyuki Iwashima return -ENOMEM;
393333bb73fSKuniyuki Iwashima
394333bb73fSKuniyuki Iwashima id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
395333bb73fSKuniyuki Iwashima if (id < 0) {
396333bb73fSKuniyuki Iwashima kfree(reuse);
397333bb73fSKuniyuki Iwashima return id;
398333bb73fSKuniyuki Iwashima }
399333bb73fSKuniyuki Iwashima
400333bb73fSKuniyuki Iwashima reuse->reuseport_id = id;
401333bb73fSKuniyuki Iwashima reuse->bind_inany = bind_inany;
402333bb73fSKuniyuki Iwashima } else {
403333bb73fSKuniyuki Iwashima /* Move sk from the old group to the new one if
404333bb73fSKuniyuki Iwashima * - all the other listeners in the old group were close()d or
405333bb73fSKuniyuki Iwashima * shutdown()ed, and then sk2 has listen()ed on the same port
406333bb73fSKuniyuki Iwashima * OR
407333bb73fSKuniyuki Iwashima * - sk listen()ed without bind() (or with autobind), was
408333bb73fSKuniyuki Iwashima * shutdown()ed, and then listen()s on another port which
409333bb73fSKuniyuki Iwashima * sk2 listen()s on.
410333bb73fSKuniyuki Iwashima */
411333bb73fSKuniyuki Iwashima if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
412333bb73fSKuniyuki Iwashima reuse = reuseport_grow(reuse);
413333bb73fSKuniyuki Iwashima if (!reuse)
414333bb73fSKuniyuki Iwashima return -ENOMEM;
415333bb73fSKuniyuki Iwashima }
416333bb73fSKuniyuki Iwashima }
417333bb73fSKuniyuki Iwashima
418333bb73fSKuniyuki Iwashima __reuseport_detach_closed_sock(sk, old_reuse);
419333bb73fSKuniyuki Iwashima __reuseport_add_sock(sk, reuse);
420333bb73fSKuniyuki Iwashima rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
421333bb73fSKuniyuki Iwashima
422333bb73fSKuniyuki Iwashima if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
423333bb73fSKuniyuki Iwashima call_rcu(&old_reuse->rcu, reuseport_free_rcu);
424333bb73fSKuniyuki Iwashima
425333bb73fSKuniyuki Iwashima return 0;
426333bb73fSKuniyuki Iwashima }
427333bb73fSKuniyuki Iwashima
reuseport_detach_sock(struct sock * sk)428ef456144SCraig Gallek void reuseport_detach_sock(struct sock *sk)
429ef456144SCraig Gallek {
430ef456144SCraig Gallek struct sock_reuseport *reuse;
431ef456144SCraig Gallek
432ef456144SCraig Gallek spin_lock_bh(&reuseport_lock);
433ef456144SCraig Gallek reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
434ef456144SCraig Gallek lockdep_is_held(&reuseport_lock));
4355dc4c4b7SMartin KaFai Lau
436333bb73fSKuniyuki Iwashima /* reuseport_grow() has detached a closed sk */
437333bb73fSKuniyuki Iwashima if (!reuse)
438333bb73fSKuniyuki Iwashima goto out;
439333bb73fSKuniyuki Iwashima
440035ff358SJakub Sitnicki /* Notify the bpf side. The sk may be added to a sockarray
441035ff358SJakub Sitnicki * map. If so, sockarray logic will remove it from the map.
442035ff358SJakub Sitnicki *
443035ff358SJakub Sitnicki * Other bpf map types that work with reuseport, like sockmap,
444035ff358SJakub Sitnicki * don't need an explicit callback from here. They override sk
445035ff358SJakub Sitnicki * unhash/close ops to remove the sk from the map before we
446035ff358SJakub Sitnicki * get to this point.
4475dc4c4b7SMartin KaFai Lau */
4485dc4c4b7SMartin KaFai Lau bpf_sk_reuseport_detach(sk);
4495dc4c4b7SMartin KaFai Lau
450ef456144SCraig Gallek rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
451333bb73fSKuniyuki Iwashima
452333bb73fSKuniyuki Iwashima if (!__reuseport_detach_closed_sock(sk, reuse))
4535c040eafSKuniyuki Iwashima __reuseport_detach_sock(sk, reuse);
454ef456144SCraig Gallek
4555c040eafSKuniyuki Iwashima if (reuse->num_socks + reuse->num_closed_socks == 0)
456538950a1SCraig Gallek call_rcu(&reuse->rcu, reuseport_free_rcu);
4575c040eafSKuniyuki Iwashima
458333bb73fSKuniyuki Iwashima out:
459ef456144SCraig Gallek spin_unlock_bh(&reuseport_lock);
460ef456144SCraig Gallek }
461ef456144SCraig Gallek EXPORT_SYMBOL(reuseport_detach_sock);
462ef456144SCraig Gallek
reuseport_stop_listen_sock(struct sock * sk)463333bb73fSKuniyuki Iwashima void reuseport_stop_listen_sock(struct sock *sk)
464333bb73fSKuniyuki Iwashima {
465333bb73fSKuniyuki Iwashima if (sk->sk_protocol == IPPROTO_TCP) {
466333bb73fSKuniyuki Iwashima struct sock_reuseport *reuse;
467d5e4ddaeSKuniyuki Iwashima struct bpf_prog *prog;
468333bb73fSKuniyuki Iwashima
469333bb73fSKuniyuki Iwashima spin_lock_bh(&reuseport_lock);
470333bb73fSKuniyuki Iwashima
471333bb73fSKuniyuki Iwashima reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
472333bb73fSKuniyuki Iwashima lockdep_is_held(&reuseport_lock));
473d5e4ddaeSKuniyuki Iwashima prog = rcu_dereference_protected(reuse->prog,
474d5e4ddaeSKuniyuki Iwashima lockdep_is_held(&reuseport_lock));
475333bb73fSKuniyuki Iwashima
4764177f545SKuniyuki Iwashima if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
477d5e4ddaeSKuniyuki Iwashima (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
478333bb73fSKuniyuki Iwashima /* Migration capable, move sk from the listening section
479333bb73fSKuniyuki Iwashima * to the closed section.
480333bb73fSKuniyuki Iwashima */
481333bb73fSKuniyuki Iwashima bpf_sk_reuseport_detach(sk);
482333bb73fSKuniyuki Iwashima
483333bb73fSKuniyuki Iwashima __reuseport_detach_sock(sk, reuse);
484333bb73fSKuniyuki Iwashima __reuseport_add_closed_sock(sk, reuse);
485333bb73fSKuniyuki Iwashima
486333bb73fSKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
487333bb73fSKuniyuki Iwashima return;
488333bb73fSKuniyuki Iwashima }
489333bb73fSKuniyuki Iwashima
490333bb73fSKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
491333bb73fSKuniyuki Iwashima }
492333bb73fSKuniyuki Iwashima
493333bb73fSKuniyuki Iwashima /* Not capable to do migration, detach immediately */
494333bb73fSKuniyuki Iwashima reuseport_detach_sock(sk);
495333bb73fSKuniyuki Iwashima }
496333bb73fSKuniyuki Iwashima EXPORT_SYMBOL(reuseport_stop_listen_sock);
497333bb73fSKuniyuki Iwashima
run_bpf_filter(struct sock_reuseport * reuse,u16 socks,struct bpf_prog * prog,struct sk_buff * skb,int hdr_len)4988217ca65SMartin KaFai Lau static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
499538950a1SCraig Gallek struct bpf_prog *prog, struct sk_buff *skb,
500538950a1SCraig Gallek int hdr_len)
501538950a1SCraig Gallek {
502538950a1SCraig Gallek struct sk_buff *nskb = NULL;
503538950a1SCraig Gallek u32 index;
504538950a1SCraig Gallek
505538950a1SCraig Gallek if (skb_shared(skb)) {
506538950a1SCraig Gallek nskb = skb_clone(skb, GFP_ATOMIC);
507538950a1SCraig Gallek if (!nskb)
508538950a1SCraig Gallek return NULL;
509538950a1SCraig Gallek skb = nskb;
510538950a1SCraig Gallek }
511538950a1SCraig Gallek
512538950a1SCraig Gallek /* temporarily advance data past protocol header */
513538950a1SCraig Gallek if (!pskb_pull(skb, hdr_len)) {
51400ce3a15SCraig Gallek kfree_skb(nskb);
515538950a1SCraig Gallek return NULL;
516538950a1SCraig Gallek }
517538950a1SCraig Gallek index = bpf_prog_run_save_cb(prog, skb);
518538950a1SCraig Gallek __skb_push(skb, hdr_len);
519538950a1SCraig Gallek
520538950a1SCraig Gallek consume_skb(nskb);
521538950a1SCraig Gallek
522538950a1SCraig Gallek if (index >= socks)
523538950a1SCraig Gallek return NULL;
524538950a1SCraig Gallek
525538950a1SCraig Gallek return reuse->socks[index];
526538950a1SCraig Gallek }
527538950a1SCraig Gallek
reuseport_select_sock_by_hash(struct sock_reuseport * reuse,u32 hash,u16 num_socks)5281cd62c21SKuniyuki Iwashima static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
5291cd62c21SKuniyuki Iwashima u32 hash, u16 num_socks)
5301cd62c21SKuniyuki Iwashima {
531*b261eda8SKuniyuki Iwashima struct sock *first_valid_sk = NULL;
5321cd62c21SKuniyuki Iwashima int i, j;
5331cd62c21SKuniyuki Iwashima
5341cd62c21SKuniyuki Iwashima i = j = reciprocal_scale(hash, num_socks);
535*b261eda8SKuniyuki Iwashima do {
536*b261eda8SKuniyuki Iwashima struct sock *sk = reuse->socks[i];
537*b261eda8SKuniyuki Iwashima
538*b261eda8SKuniyuki Iwashima if (sk->sk_state != TCP_ESTABLISHED) {
539*b261eda8SKuniyuki Iwashima /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */
540*b261eda8SKuniyuki Iwashima if (!READ_ONCE(reuse->incoming_cpu))
541*b261eda8SKuniyuki Iwashima return sk;
542*b261eda8SKuniyuki Iwashima
543*b261eda8SKuniyuki Iwashima /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */
544*b261eda8SKuniyuki Iwashima if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
545*b261eda8SKuniyuki Iwashima return sk;
546*b261eda8SKuniyuki Iwashima
547*b261eda8SKuniyuki Iwashima if (!first_valid_sk)
548*b261eda8SKuniyuki Iwashima first_valid_sk = sk;
549*b261eda8SKuniyuki Iwashima }
550*b261eda8SKuniyuki Iwashima
5511cd62c21SKuniyuki Iwashima i++;
5521cd62c21SKuniyuki Iwashima if (i >= num_socks)
5531cd62c21SKuniyuki Iwashima i = 0;
554*b261eda8SKuniyuki Iwashima } while (i != j);
5551cd62c21SKuniyuki Iwashima
556*b261eda8SKuniyuki Iwashima return first_valid_sk;
5571cd62c21SKuniyuki Iwashima }
5581cd62c21SKuniyuki Iwashima
559ef456144SCraig Gallek /**
560ef456144SCraig Gallek * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
561ef456144SCraig Gallek * @sk: First socket in the group.
562538950a1SCraig Gallek * @hash: When no BPF filter is available, use this hash to select.
563538950a1SCraig Gallek * @skb: skb to run through BPF filter.
564538950a1SCraig Gallek * @hdr_len: BPF filter expects skb data pointer at payload data. If
565538950a1SCraig Gallek * the skb does not yet point at the payload, this parameter represents
566538950a1SCraig Gallek * how far the pointer needs to advance to reach the payload.
567ef456144SCraig Gallek * Returns a socket that should receive the packet (or NULL on error).
568ef456144SCraig Gallek */
reuseport_select_sock(struct sock * sk,u32 hash,struct sk_buff * skb,int hdr_len)569538950a1SCraig Gallek struct sock *reuseport_select_sock(struct sock *sk,
570538950a1SCraig Gallek u32 hash,
571538950a1SCraig Gallek struct sk_buff *skb,
572538950a1SCraig Gallek int hdr_len)
573ef456144SCraig Gallek {
574ef456144SCraig Gallek struct sock_reuseport *reuse;
575538950a1SCraig Gallek struct bpf_prog *prog;
576ef456144SCraig Gallek struct sock *sk2 = NULL;
577ef456144SCraig Gallek u16 socks;
578ef456144SCraig Gallek
579ef456144SCraig Gallek rcu_read_lock();
580ef456144SCraig Gallek reuse = rcu_dereference(sk->sk_reuseport_cb);
581ef456144SCraig Gallek
582ef456144SCraig Gallek /* if memory allocation failed or add call is not yet complete */
583ef456144SCraig Gallek if (!reuse)
584ef456144SCraig Gallek goto out;
585ef456144SCraig Gallek
586538950a1SCraig Gallek prog = rcu_dereference(reuse->prog);
587ef456144SCraig Gallek socks = READ_ONCE(reuse->num_socks);
588ef456144SCraig Gallek if (likely(socks)) {
5895c040eafSKuniyuki Iwashima /* paired with smp_wmb() in __reuseport_add_sock() */
590ef456144SCraig Gallek smp_rmb();
591ef456144SCraig Gallek
5928217ca65SMartin KaFai Lau if (!prog || !skb)
5938217ca65SMartin KaFai Lau goto select_by_hash;
594e94a62f5SPaolo Abeni
5958217ca65SMartin KaFai Lau if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
596d5e4ddaeSKuniyuki Iwashima sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
5978217ca65SMartin KaFai Lau else
5988217ca65SMartin KaFai Lau sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
5998217ca65SMartin KaFai Lau
6008217ca65SMartin KaFai Lau select_by_hash:
601e94a62f5SPaolo Abeni /* no bpf or invalid bpf result: fall back to hash usage */
6021cd62c21SKuniyuki Iwashima if (!sk2)
6031cd62c21SKuniyuki Iwashima sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
604ef456144SCraig Gallek }
605ef456144SCraig Gallek
606ef456144SCraig Gallek out:
607ef456144SCraig Gallek rcu_read_unlock();
608ef456144SCraig Gallek return sk2;
609ef456144SCraig Gallek }
610ef456144SCraig Gallek EXPORT_SYMBOL(reuseport_select_sock);
611538950a1SCraig Gallek
6121cd62c21SKuniyuki Iwashima /**
6131cd62c21SKuniyuki Iwashima * reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
6141cd62c21SKuniyuki Iwashima * @sk: close()ed or shutdown()ed socket in the group.
6151cd62c21SKuniyuki Iwashima * @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
6161cd62c21SKuniyuki Iwashima * NEW_SYN_RECV request socket during 3WHS.
6171cd62c21SKuniyuki Iwashima * @skb: skb to run through BPF filter.
6181cd62c21SKuniyuki Iwashima * Returns a socket (with sk_refcnt +1) that should accept the child socket
6191cd62c21SKuniyuki Iwashima * (or NULL on error).
6201cd62c21SKuniyuki Iwashima */
reuseport_migrate_sock(struct sock * sk,struct sock * migrating_sk,struct sk_buff * skb)6211cd62c21SKuniyuki Iwashima struct sock *reuseport_migrate_sock(struct sock *sk,
6221cd62c21SKuniyuki Iwashima struct sock *migrating_sk,
6231cd62c21SKuniyuki Iwashima struct sk_buff *skb)
6241cd62c21SKuniyuki Iwashima {
6251cd62c21SKuniyuki Iwashima struct sock_reuseport *reuse;
6261cd62c21SKuniyuki Iwashima struct sock *nsk = NULL;
627d5e4ddaeSKuniyuki Iwashima bool allocated = false;
628d5e4ddaeSKuniyuki Iwashima struct bpf_prog *prog;
6291cd62c21SKuniyuki Iwashima u16 socks;
6301cd62c21SKuniyuki Iwashima u32 hash;
6311cd62c21SKuniyuki Iwashima
6321cd62c21SKuniyuki Iwashima rcu_read_lock();
6331cd62c21SKuniyuki Iwashima
6341cd62c21SKuniyuki Iwashima reuse = rcu_dereference(sk->sk_reuseport_cb);
6351cd62c21SKuniyuki Iwashima if (!reuse)
6361cd62c21SKuniyuki Iwashima goto out;
6371cd62c21SKuniyuki Iwashima
6381cd62c21SKuniyuki Iwashima socks = READ_ONCE(reuse->num_socks);
6391cd62c21SKuniyuki Iwashima if (unlikely(!socks))
64055d444b3SKuniyuki Iwashima goto failure;
6411cd62c21SKuniyuki Iwashima
6421cd62c21SKuniyuki Iwashima /* paired with smp_wmb() in __reuseport_add_sock() */
6431cd62c21SKuniyuki Iwashima smp_rmb();
6441cd62c21SKuniyuki Iwashima
6451cd62c21SKuniyuki Iwashima hash = migrating_sk->sk_hash;
646d5e4ddaeSKuniyuki Iwashima prog = rcu_dereference(reuse->prog);
647d5e4ddaeSKuniyuki Iwashima if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
6484177f545SKuniyuki Iwashima if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
649d5e4ddaeSKuniyuki Iwashima goto select_by_hash;
65055d444b3SKuniyuki Iwashima goto failure;
651d5e4ddaeSKuniyuki Iwashima }
652d5e4ddaeSKuniyuki Iwashima
653d5e4ddaeSKuniyuki Iwashima if (!skb) {
654d5e4ddaeSKuniyuki Iwashima skb = alloc_skb(0, GFP_ATOMIC);
655d5e4ddaeSKuniyuki Iwashima if (!skb)
65655d444b3SKuniyuki Iwashima goto failure;
657d5e4ddaeSKuniyuki Iwashima allocated = true;
658d5e4ddaeSKuniyuki Iwashima }
659d5e4ddaeSKuniyuki Iwashima
660d5e4ddaeSKuniyuki Iwashima nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
661d5e4ddaeSKuniyuki Iwashima
662d5e4ddaeSKuniyuki Iwashima if (allocated)
663d5e4ddaeSKuniyuki Iwashima kfree_skb(skb);
664d5e4ddaeSKuniyuki Iwashima
665d5e4ddaeSKuniyuki Iwashima select_by_hash:
666d5e4ddaeSKuniyuki Iwashima if (!nsk)
6671cd62c21SKuniyuki Iwashima nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
6681cd62c21SKuniyuki Iwashima
66955d444b3SKuniyuki Iwashima if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
6701cd62c21SKuniyuki Iwashima nsk = NULL;
67155d444b3SKuniyuki Iwashima goto failure;
67255d444b3SKuniyuki Iwashima }
6731cd62c21SKuniyuki Iwashima
6741cd62c21SKuniyuki Iwashima out:
6751cd62c21SKuniyuki Iwashima rcu_read_unlock();
6761cd62c21SKuniyuki Iwashima return nsk;
67755d444b3SKuniyuki Iwashima
67855d444b3SKuniyuki Iwashima failure:
67955d444b3SKuniyuki Iwashima __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
68055d444b3SKuniyuki Iwashima goto out;
6811cd62c21SKuniyuki Iwashima }
6821cd62c21SKuniyuki Iwashima EXPORT_SYMBOL(reuseport_migrate_sock);
6831cd62c21SKuniyuki Iwashima
reuseport_attach_prog(struct sock * sk,struct bpf_prog * prog)6848217ca65SMartin KaFai Lau int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
685538950a1SCraig Gallek {
686538950a1SCraig Gallek struct sock_reuseport *reuse;
687538950a1SCraig Gallek struct bpf_prog *old_prog;
688538950a1SCraig Gallek
689333bb73fSKuniyuki Iwashima if (sk_unhashed(sk)) {
690333bb73fSKuniyuki Iwashima int err;
6918217ca65SMartin KaFai Lau
692333bb73fSKuniyuki Iwashima if (!sk->sk_reuseport)
693333bb73fSKuniyuki Iwashima return -EINVAL;
694333bb73fSKuniyuki Iwashima
695333bb73fSKuniyuki Iwashima err = reuseport_alloc(sk, false);
6968217ca65SMartin KaFai Lau if (err)
6978217ca65SMartin KaFai Lau return err;
6988217ca65SMartin KaFai Lau } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
6998217ca65SMartin KaFai Lau /* The socket wasn't bound with SO_REUSEPORT */
7008217ca65SMartin KaFai Lau return -EINVAL;
7018217ca65SMartin KaFai Lau }
7028217ca65SMartin KaFai Lau
703538950a1SCraig Gallek spin_lock_bh(&reuseport_lock);
704538950a1SCraig Gallek reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
705538950a1SCraig Gallek lockdep_is_held(&reuseport_lock));
706538950a1SCraig Gallek old_prog = rcu_dereference_protected(reuse->prog,
707538950a1SCraig Gallek lockdep_is_held(&reuseport_lock));
708538950a1SCraig Gallek rcu_assign_pointer(reuse->prog, prog);
709538950a1SCraig Gallek spin_unlock_bh(&reuseport_lock);
710538950a1SCraig Gallek
7118217ca65SMartin KaFai Lau sk_reuseport_prog_free(old_prog);
7128217ca65SMartin KaFai Lau return 0;
713538950a1SCraig Gallek }
714538950a1SCraig Gallek EXPORT_SYMBOL(reuseport_attach_prog);
71599f3a064SMartin KaFai Lau
reuseport_detach_prog(struct sock * sk)71699f3a064SMartin KaFai Lau int reuseport_detach_prog(struct sock *sk)
71799f3a064SMartin KaFai Lau {
71899f3a064SMartin KaFai Lau struct sock_reuseport *reuse;
71999f3a064SMartin KaFai Lau struct bpf_prog *old_prog;
72099f3a064SMartin KaFai Lau
72199f3a064SMartin KaFai Lau old_prog = NULL;
72299f3a064SMartin KaFai Lau spin_lock_bh(&reuseport_lock);
72399f3a064SMartin KaFai Lau reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
72499f3a064SMartin KaFai Lau lockdep_is_held(&reuseport_lock));
725333bb73fSKuniyuki Iwashima
726333bb73fSKuniyuki Iwashima /* reuse must be checked after acquiring the reuseport_lock
727333bb73fSKuniyuki Iwashima * because reuseport_grow() can detach a closed sk.
728333bb73fSKuniyuki Iwashima */
729333bb73fSKuniyuki Iwashima if (!reuse) {
730333bb73fSKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
731333bb73fSKuniyuki Iwashima return sk->sk_reuseport ? -ENOENT : -EINVAL;
732333bb73fSKuniyuki Iwashima }
733333bb73fSKuniyuki Iwashima
734333bb73fSKuniyuki Iwashima if (sk_unhashed(sk) && reuse->num_closed_socks) {
735333bb73fSKuniyuki Iwashima spin_unlock_bh(&reuseport_lock);
736333bb73fSKuniyuki Iwashima return -ENOENT;
737333bb73fSKuniyuki Iwashima }
738333bb73fSKuniyuki Iwashima
739e3f0d761SPaul E. McKenney old_prog = rcu_replace_pointer(reuse->prog, old_prog,
74099f3a064SMartin KaFai Lau lockdep_is_held(&reuseport_lock));
74199f3a064SMartin KaFai Lau spin_unlock_bh(&reuseport_lock);
74299f3a064SMartin KaFai Lau
74399f3a064SMartin KaFai Lau if (!old_prog)
74499f3a064SMartin KaFai Lau return -ENOENT;
74599f3a064SMartin KaFai Lau
74699f3a064SMartin KaFai Lau sk_reuseport_prog_free(old_prog);
74799f3a064SMartin KaFai Lau return 0;
74899f3a064SMartin KaFai Lau }
74999f3a064SMartin KaFai Lau EXPORT_SYMBOL(reuseport_detach_prog);
750