12303f994SPeter Krystad // SPDX-License-Identifier: GPL-2.0
22303f994SPeter Krystad /* Multipath TCP
32303f994SPeter Krystad *
42303f994SPeter Krystad * Copyright (c) 2017 - 2019, Intel Corporation.
52303f994SPeter Krystad */
62303f994SPeter Krystad
779c0949eSPeter Krystad #define pr_fmt(fmt) "MPTCP: " fmt
879c0949eSPeter Krystad
92303f994SPeter Krystad #include <linux/kernel.h>
102303f994SPeter Krystad #include <linux/module.h>
112303f994SPeter Krystad #include <linux/netdevice.h>
12f296234cSPeter Krystad #include <crypto/algapi.h>
13a24d22b2SEric Biggers #include <crypto/sha2.h>
142303f994SPeter Krystad #include <net/sock.h>
152303f994SPeter Krystad #include <net/inet_common.h>
162303f994SPeter Krystad #include <net/inet_hashtables.h>
172303f994SPeter Krystad #include <net/protocol.h>
182303f994SPeter Krystad #include <net/tcp.h>
19cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
20cec37a6eSPeter Krystad #include <net/ip6_route.h>
21b19bc294SPaolo Abeni #include <net/transp_v6.h>
22cec37a6eSPeter Krystad #endif
232303f994SPeter Krystad #include <net/mptcp.h>
244596a2c1SPaolo Abeni #include <uapi/linux/mptcp.h>
252303f994SPeter Krystad #include "protocol.h"
26fc518953SFlorian Westphal #include "mib.h"
27fc518953SFlorian Westphal
280918e34bSGeliang Tang #include <trace/events/mptcp.h>
2940e0b090SPeilin Ye #include <trace/events/sock.h>
300918e34bSGeliang Tang
31b19bc294SPaolo Abeni static void mptcp_subflow_ops_undo_override(struct sock *ssk);
32b19bc294SPaolo Abeni
SUBFLOW_REQ_INC_STATS(struct request_sock * req,enum linux_mptcp_mib_field field)33fc518953SFlorian Westphal static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
34fc518953SFlorian Westphal enum linux_mptcp_mib_field field)
35fc518953SFlorian Westphal {
36fc518953SFlorian Westphal MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
37fc518953SFlorian Westphal }
382303f994SPeter Krystad
subflow_req_destructor(struct request_sock * req)3979c0949eSPeter Krystad static void subflow_req_destructor(struct request_sock *req)
4079c0949eSPeter Krystad {
4179c0949eSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
4279c0949eSPeter Krystad
433d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow_req=%p\n", subflow_req);
4479c0949eSPeter Krystad
458fd4de12SPaolo Abeni if (subflow_req->msk)
468fd4de12SPaolo Abeni sock_put((struct sock *)subflow_req->msk);
478fd4de12SPaolo Abeni
482c5ebd00SPaolo Abeni mptcp_token_destroy_request(req);
4979c0949eSPeter Krystad }
5079c0949eSPeter Krystad
subflow_generate_hmac(u64 key1,u64 key2,u32 nonce1,u32 nonce2,void * hmac)51f296234cSPeter Krystad static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
52f296234cSPeter Krystad void *hmac)
53f296234cSPeter Krystad {
54f296234cSPeter Krystad u8 msg[8];
55f296234cSPeter Krystad
56f296234cSPeter Krystad put_unaligned_be32(nonce1, &msg[0]);
57f296234cSPeter Krystad put_unaligned_be32(nonce2, &msg[4]);
58f296234cSPeter Krystad
59f296234cSPeter Krystad mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
60f296234cSPeter Krystad }
61f296234cSPeter Krystad
mptcp_can_accept_new_subflow(const struct mptcp_sock * msk)624cf8b7e4SPaolo Abeni static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
634cf8b7e4SPaolo Abeni {
644cf8b7e4SPaolo Abeni return mptcp_is_fully_established((void *)msk) &&
654d25247dSKishen Maloor ((mptcp_pm_is_userspace(msk) &&
664d25247dSKishen Maloor mptcp_userspace_pm_active(msk)) ||
674d25247dSKishen Maloor READ_ONCE(msk->pm.accept_subflow));
684cf8b7e4SPaolo Abeni }
694cf8b7e4SPaolo Abeni
70f296234cSPeter Krystad /* validate received token and create truncated hmac and nonce for SYN-ACK */
subflow_req_create_thmac(struct mptcp_subflow_request_sock * subflow_req)71ec20e143SGeliang Tang static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req)
72ec20e143SGeliang Tang {
73ec20e143SGeliang Tang struct mptcp_sock *msk = subflow_req->msk;
74ec20e143SGeliang Tang u8 hmac[SHA256_DIGEST_SIZE];
75ec20e143SGeliang Tang
76ec20e143SGeliang Tang get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
77ec20e143SGeliang Tang
78ec20e143SGeliang Tang subflow_generate_hmac(msk->local_key, msk->remote_key,
79ec20e143SGeliang Tang subflow_req->local_nonce,
80ec20e143SGeliang Tang subflow_req->remote_nonce, hmac);
81ec20e143SGeliang Tang
82ec20e143SGeliang Tang subflow_req->thmac = get_unaligned_be64(hmac);
83ec20e143SGeliang Tang }
84ec20e143SGeliang Tang
subflow_token_join_request(struct request_sock * req)85b5e2e42fSGeliang Tang static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
86f296234cSPeter Krystad {
87f296234cSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
88f296234cSPeter Krystad struct mptcp_sock *msk;
89f296234cSPeter Krystad int local_id;
90f296234cSPeter Krystad
91ea1300b9SFlorian Westphal msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
92f296234cSPeter Krystad if (!msk) {
93fc518953SFlorian Westphal SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
948fd4de12SPaolo Abeni return NULL;
95f296234cSPeter Krystad }
96f296234cSPeter Krystad
97f296234cSPeter Krystad local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
98f296234cSPeter Krystad if (local_id < 0) {
99f296234cSPeter Krystad sock_put((struct sock *)msk);
1008fd4de12SPaolo Abeni return NULL;
101f296234cSPeter Krystad }
102f296234cSPeter Krystad subflow_req->local_id = local_id;
103fff9b920SMatthieu Baerts (NGI0) subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req);
104f296234cSPeter Krystad
1058fd4de12SPaolo Abeni return msk;
106f296234cSPeter Krystad }
107f296234cSPeter Krystad
subflow_init_req(struct request_sock * req,const struct sock * sk_listener)108d8b59efaSPaolo Abeni static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
109cec37a6eSPeter Krystad {
110cec37a6eSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
111cec37a6eSPeter Krystad
112cec37a6eSPeter Krystad subflow_req->mp_capable = 0;
113f296234cSPeter Krystad subflow_req->mp_join = 0;
11406fe1719SGeliang Tang subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
115bab6b88eSGeliang Tang subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
1168fd4de12SPaolo Abeni subflow_req->msk = NULL;
1172c5ebd00SPaolo Abeni mptcp_token_init_request(req);
11878d8b7bcSFlorian Westphal }
11978d8b7bcSFlorian Westphal
subflow_use_different_sport(struct mptcp_sock * msk,const struct sock * sk)1205bc56388SGeliang Tang static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
1215bc56388SGeliang Tang {
1225bc56388SGeliang Tang return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
1235bc56388SGeliang Tang }
1245bc56388SGeliang Tang
subflow_add_reset_reason(struct sk_buff * skb,u8 reason)125dc87efdbSFlorian Westphal static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
126dc87efdbSFlorian Westphal {
127dc87efdbSFlorian Westphal struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
128dc87efdbSFlorian Westphal
129dc87efdbSFlorian Westphal if (mpext) {
130dc87efdbSFlorian Westphal memset(mpext, 0, sizeof(*mpext));
131dc87efdbSFlorian Westphal mpext->reset_reason = reason;
132dc87efdbSFlorian Westphal }
133dc87efdbSFlorian Westphal }
134dc87efdbSFlorian Westphal
subflow_reset_req_endp(struct request_sock * req,struct sk_buff * skb)13537d9305cSPaolo Abeni static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb)
13637d9305cSPaolo Abeni {
13737d9305cSPaolo Abeni SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT);
13837d9305cSPaolo Abeni subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
13937d9305cSPaolo Abeni return -EPERM;
14037d9305cSPaolo Abeni }
14137d9305cSPaolo Abeni
1423ecfbe3eSFlorian Westphal /* Init mptcp request socket.
1433ecfbe3eSFlorian Westphal *
1443ecfbe3eSFlorian Westphal * Returns an error code if a JOIN has failed and a TCP reset
1453ecfbe3eSFlorian Westphal * should be sent.
1463ecfbe3eSFlorian Westphal */
subflow_check_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)147d8b59efaSPaolo Abeni static int subflow_check_req(struct request_sock *req,
14878d8b7bcSFlorian Westphal const struct sock *sk_listener,
14978d8b7bcSFlorian Westphal struct sk_buff *skb)
15078d8b7bcSFlorian Westphal {
15178d8b7bcSFlorian Westphal struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
15278d8b7bcSFlorian Westphal struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
15378d8b7bcSFlorian Westphal struct mptcp_options_received mp_opt;
15474c7dfbeSPaolo Abeni bool opt_mp_capable, opt_mp_join;
15578d8b7bcSFlorian Westphal
1563d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow_req=%p, listener=%p\n", subflow_req, listener);
15778d8b7bcSFlorian Westphal
158cec37a6eSPeter Krystad #ifdef CONFIG_TCP_MD5SIG
159cec37a6eSPeter Krystad /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
160cec37a6eSPeter Krystad * TCP option space.
161cec37a6eSPeter Krystad */
162cec37a6eSPeter Krystad if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
163cec37a6eSPeter Krystad return -EINVAL;
164cec37a6eSPeter Krystad #endif
16578d8b7bcSFlorian Westphal
1660799e21bSGeliang Tang mptcp_get_options(skb, &mp_opt);
16778d8b7bcSFlorian Westphal
168d4368227SEric Dumazet opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN);
169c119bcd9SEric Dumazet opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN);
17074c7dfbeSPaolo Abeni if (opt_mp_capable) {
171fc518953SFlorian Westphal SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
172fc518953SFlorian Westphal
17337d9305cSPaolo Abeni if (unlikely(listener->pm_listener))
17437d9305cSPaolo Abeni return subflow_reset_req_endp(req, skb);
17574c7dfbeSPaolo Abeni if (opt_mp_join)
1763ecfbe3eSFlorian Westphal return 0;
17774c7dfbeSPaolo Abeni } else if (opt_mp_join) {
178fc518953SFlorian Westphal SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
1796721cbaaSMatthieu Baerts (NGI0)
1806721cbaaSMatthieu Baerts (NGI0) if (mp_opt.backup)
1816721cbaaSMatthieu Baerts (NGI0) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
18237d9305cSPaolo Abeni } else if (unlikely(listener->pm_listener)) {
18337d9305cSPaolo Abeni return subflow_reset_req_endp(req, skb);
184fc518953SFlorian Westphal }
185f296234cSPeter Krystad
18674c7dfbeSPaolo Abeni if (opt_mp_capable && listener->request_mptcp) {
187c68a0cd1SJianguo Wu int err, retries = MPTCP_TOKEN_MAX_RETRIES;
188535fb815SFlorian Westphal
189c83a47e5SFlorian Westphal subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
190535fb815SFlorian Westphal again:
191535fb815SFlorian Westphal do {
192535fb815SFlorian Westphal get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
193535fb815SFlorian Westphal } while (subflow_req->local_key == 0);
19479c0949eSPeter Krystad
195c83a47e5SFlorian Westphal if (unlikely(req->syncookie)) {
196c83a47e5SFlorian Westphal mptcp_crypto_key_sha(subflow_req->local_key,
197c83a47e5SFlorian Westphal &subflow_req->token,
198c83a47e5SFlorian Westphal &subflow_req->idsn);
199c83a47e5SFlorian Westphal if (mptcp_token_exists(subflow_req->token)) {
200c83a47e5SFlorian Westphal if (retries-- > 0)
201c83a47e5SFlorian Westphal goto again;
202a16195e3SPaolo Abeni SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
203c83a47e5SFlorian Westphal } else {
204c83a47e5SFlorian Westphal subflow_req->mp_capable = 1;
205c83a47e5SFlorian Westphal }
2063ecfbe3eSFlorian Westphal return 0;
207c83a47e5SFlorian Westphal }
208c83a47e5SFlorian Westphal
20979c0949eSPeter Krystad err = mptcp_token_new_request(req);
21079c0949eSPeter Krystad if (err == 0)
211cec37a6eSPeter Krystad subflow_req->mp_capable = 1;
212535fb815SFlorian Westphal else if (retries-- > 0)
213535fb815SFlorian Westphal goto again;
214a16195e3SPaolo Abeni else
215a16195e3SPaolo Abeni SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
21679c0949eSPeter Krystad
21774c7dfbeSPaolo Abeni } else if (opt_mp_join && listener->request_mptcp) {
218ec3edaa7SPeter Krystad subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
219f296234cSPeter Krystad subflow_req->mp_join = 1;
220cfde141eSPaolo Abeni subflow_req->backup = mp_opt.backup;
221cfde141eSPaolo Abeni subflow_req->remote_id = mp_opt.join_id;
222cfde141eSPaolo Abeni subflow_req->token = mp_opt.token;
223cfde141eSPaolo Abeni subflow_req->remote_nonce = mp_opt.nonce;
224b5e2e42fSGeliang Tang subflow_req->msk = subflow_token_join_request(req);
2259466a1ccSFlorian Westphal
2263ecfbe3eSFlorian Westphal /* Can't fall back to TCP in this case. */
227dc87efdbSFlorian Westphal if (!subflow_req->msk) {
228dc87efdbSFlorian Westphal subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
2293ecfbe3eSFlorian Westphal return -EPERM;
230dc87efdbSFlorian Westphal }
2313ecfbe3eSFlorian Westphal
2325bc56388SGeliang Tang if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
2333d2e1b82SMatthieu Baerts (NGI0) pr_debug("syn inet_sport=%d %d\n",
2345bc56388SGeliang Tang ntohs(inet_sk(sk_listener)->inet_sport),
2355bc56388SGeliang Tang ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
2365bc56388SGeliang Tang if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
2372fbdd9eaSGeliang Tang SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
2385bc56388SGeliang Tang return -EPERM;
2395bc56388SGeliang Tang }
2402fbdd9eaSGeliang Tang SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
2415bc56388SGeliang Tang }
2425bc56388SGeliang Tang
243ec20e143SGeliang Tang subflow_req_create_thmac(subflow_req);
244ec20e143SGeliang Tang
2453ecfbe3eSFlorian Westphal if (unlikely(req->syncookie)) {
2469466a1ccSFlorian Westphal if (mptcp_can_accept_new_subflow(subflow_req->msk))
2479466a1ccSFlorian Westphal subflow_init_req_cookie_join_save(subflow_req, skb);
2488547ea5fSJianguo Wu else
2498547ea5fSJianguo Wu return -EPERM;
2509466a1ccSFlorian Westphal }
2519466a1ccSFlorian Westphal
2523d2e1b82SMatthieu Baerts (NGI0) pr_debug("token=%u, remote_nonce=%u msk=%p\n", subflow_req->token,
2538fd4de12SPaolo Abeni subflow_req->remote_nonce, subflow_req->msk);
254cec37a6eSPeter Krystad }
2553ecfbe3eSFlorian Westphal
2563ecfbe3eSFlorian Westphal return 0;
257cec37a6eSPeter Krystad }
258cec37a6eSPeter Krystad
mptcp_subflow_init_cookie_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)259c83a47e5SFlorian Westphal int mptcp_subflow_init_cookie_req(struct request_sock *req,
260c83a47e5SFlorian Westphal const struct sock *sk_listener,
261c83a47e5SFlorian Westphal struct sk_buff *skb)
262c83a47e5SFlorian Westphal {
263c83a47e5SFlorian Westphal struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
264c83a47e5SFlorian Westphal struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
265c83a47e5SFlorian Westphal struct mptcp_options_received mp_opt;
26674c7dfbeSPaolo Abeni bool opt_mp_capable, opt_mp_join;
267c83a47e5SFlorian Westphal int err;
268c83a47e5SFlorian Westphal
269d8b59efaSPaolo Abeni subflow_init_req(req, sk_listener);
2700799e21bSGeliang Tang mptcp_get_options(skb, &mp_opt);
271c83a47e5SFlorian Westphal
272d4368227SEric Dumazet opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK);
273c119bcd9SEric Dumazet opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK);
27474c7dfbeSPaolo Abeni if (opt_mp_capable && opt_mp_join)
275c83a47e5SFlorian Westphal return -EINVAL;
276c83a47e5SFlorian Westphal
27774c7dfbeSPaolo Abeni if (opt_mp_capable && listener->request_mptcp) {
278c83a47e5SFlorian Westphal if (mp_opt.sndr_key == 0)
279c83a47e5SFlorian Westphal return -EINVAL;
280c83a47e5SFlorian Westphal
281c83a47e5SFlorian Westphal subflow_req->local_key = mp_opt.rcvr_key;
282c83a47e5SFlorian Westphal err = mptcp_token_new_request(req);
283c83a47e5SFlorian Westphal if (err)
284c83a47e5SFlorian Westphal return err;
285c83a47e5SFlorian Westphal
286c83a47e5SFlorian Westphal subflow_req->mp_capable = 1;
287c83a47e5SFlorian Westphal subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
28874c7dfbeSPaolo Abeni } else if (opt_mp_join && listener->request_mptcp) {
2899466a1ccSFlorian Westphal if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
2909466a1ccSFlorian Westphal return -EINVAL;
2919466a1ccSFlorian Westphal
2929466a1ccSFlorian Westphal subflow_req->mp_join = 1;
2939466a1ccSFlorian Westphal subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
294c83a47e5SFlorian Westphal }
295c83a47e5SFlorian Westphal
296c83a47e5SFlorian Westphal return 0;
297c83a47e5SFlorian Westphal }
298c83a47e5SFlorian Westphal EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
299c83a47e5SFlorian Westphal
subflow_v4_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)3007ea851d1SFlorian Westphal static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
3017ea851d1SFlorian Westphal struct sk_buff *skb,
3027ea851d1SFlorian Westphal struct flowi *fl,
3037ea851d1SFlorian Westphal struct request_sock *req)
304cec37a6eSPeter Krystad {
3057ea851d1SFlorian Westphal struct dst_entry *dst;
3063ecfbe3eSFlorian Westphal int err;
3077ea851d1SFlorian Westphal
308cec37a6eSPeter Krystad tcp_rsk(req)->is_mptcp = 1;
309d8b59efaSPaolo Abeni subflow_init_req(req, sk);
310cec37a6eSPeter Krystad
3117ea851d1SFlorian Westphal dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
3127ea851d1SFlorian Westphal if (!dst)
3137ea851d1SFlorian Westphal return NULL;
314cec37a6eSPeter Krystad
315d8b59efaSPaolo Abeni err = subflow_check_req(req, sk, skb);
3163ecfbe3eSFlorian Westphal if (err == 0)
3177ea851d1SFlorian Westphal return dst;
3183ecfbe3eSFlorian Westphal
3193ecfbe3eSFlorian Westphal dst_release(dst);
3203ecfbe3eSFlorian Westphal if (!req->syncookie)
3213ecfbe3eSFlorian Westphal tcp_request_sock_ops.send_reset(sk, skb);
3223ecfbe3eSFlorian Westphal return NULL;
323cec37a6eSPeter Krystad }
324cec37a6eSPeter Krystad
subflow_prep_synack(const struct sock * sk,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type)32536b122baSDmytro Shytyi static void subflow_prep_synack(const struct sock *sk, struct request_sock *req,
32636b122baSDmytro Shytyi struct tcp_fastopen_cookie *foc,
32736b122baSDmytro Shytyi enum tcp_synack_type synack_type)
32836b122baSDmytro Shytyi {
32936b122baSDmytro Shytyi struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
33036b122baSDmytro Shytyi struct inet_request_sock *ireq = inet_rsk(req);
33136b122baSDmytro Shytyi
33236b122baSDmytro Shytyi /* clear tstamp_ok, as needed depending on cookie */
33336b122baSDmytro Shytyi if (foc && foc->len > -1)
33436b122baSDmytro Shytyi ireq->tstamp_ok = 0;
33536b122baSDmytro Shytyi
33636b122baSDmytro Shytyi if (synack_type == TCP_SYNACK_FASTOPEN)
33736b122baSDmytro Shytyi mptcp_fastopen_subflow_synack_set_params(subflow, req);
33836b122baSDmytro Shytyi }
33936b122baSDmytro Shytyi
subflow_v4_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)34036b122baSDmytro Shytyi static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
34136b122baSDmytro Shytyi struct flowi *fl,
34236b122baSDmytro Shytyi struct request_sock *req,
34336b122baSDmytro Shytyi struct tcp_fastopen_cookie *foc,
34436b122baSDmytro Shytyi enum tcp_synack_type synack_type,
34536b122baSDmytro Shytyi struct sk_buff *syn_skb)
34636b122baSDmytro Shytyi {
34736b122baSDmytro Shytyi subflow_prep_synack(sk, req, foc, synack_type);
34836b122baSDmytro Shytyi
34936b122baSDmytro Shytyi return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc,
35036b122baSDmytro Shytyi synack_type, syn_skb);
35136b122baSDmytro Shytyi }
35236b122baSDmytro Shytyi
353cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)35436b122baSDmytro Shytyi static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
35536b122baSDmytro Shytyi struct flowi *fl,
35636b122baSDmytro Shytyi struct request_sock *req,
35736b122baSDmytro Shytyi struct tcp_fastopen_cookie *foc,
35836b122baSDmytro Shytyi enum tcp_synack_type synack_type,
35936b122baSDmytro Shytyi struct sk_buff *syn_skb)
36036b122baSDmytro Shytyi {
36136b122baSDmytro Shytyi subflow_prep_synack(sk, req, foc, synack_type);
36236b122baSDmytro Shytyi
36336b122baSDmytro Shytyi return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc,
36436b122baSDmytro Shytyi synack_type, syn_skb);
36536b122baSDmytro Shytyi }
36636b122baSDmytro Shytyi
subflow_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)3677ea851d1SFlorian Westphal static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
3687ea851d1SFlorian Westphal struct sk_buff *skb,
3697ea851d1SFlorian Westphal struct flowi *fl,
3707ea851d1SFlorian Westphal struct request_sock *req)
371cec37a6eSPeter Krystad {
3727ea851d1SFlorian Westphal struct dst_entry *dst;
3733ecfbe3eSFlorian Westphal int err;
3747ea851d1SFlorian Westphal
375cec37a6eSPeter Krystad tcp_rsk(req)->is_mptcp = 1;
376d8b59efaSPaolo Abeni subflow_init_req(req, sk);
377cec37a6eSPeter Krystad
3787ea851d1SFlorian Westphal dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
3797ea851d1SFlorian Westphal if (!dst)
3807ea851d1SFlorian Westphal return NULL;
381cec37a6eSPeter Krystad
382d8b59efaSPaolo Abeni err = subflow_check_req(req, sk, skb);
3833ecfbe3eSFlorian Westphal if (err == 0)
3847ea851d1SFlorian Westphal return dst;
3853ecfbe3eSFlorian Westphal
3863ecfbe3eSFlorian Westphal dst_release(dst);
3873ecfbe3eSFlorian Westphal if (!req->syncookie)
3883ecfbe3eSFlorian Westphal tcp6_request_sock_ops.send_reset(sk, skb);
3893ecfbe3eSFlorian Westphal return NULL;
390cec37a6eSPeter Krystad }
391cec37a6eSPeter Krystad #endif
392cec37a6eSPeter Krystad
393ec3edaa7SPeter Krystad /* validate received truncated hmac and create hmac for third ACK */
subflow_thmac_valid(struct mptcp_subflow_context * subflow)394ec3edaa7SPeter Krystad static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
395ec3edaa7SPeter Krystad {
396bd697222STodd Malsbary u8 hmac[SHA256_DIGEST_SIZE];
397ec3edaa7SPeter Krystad u64 thmac;
398ec3edaa7SPeter Krystad
399ec3edaa7SPeter Krystad subflow_generate_hmac(subflow->remote_key, subflow->local_key,
400ec3edaa7SPeter Krystad subflow->remote_nonce, subflow->local_nonce,
401ec3edaa7SPeter Krystad hmac);
402ec3edaa7SPeter Krystad
403ec3edaa7SPeter Krystad thmac = get_unaligned_be64(hmac);
404ec3edaa7SPeter Krystad pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
405742e2f36SGeliang Tang subflow, subflow->token, thmac, subflow->thmac);
406ec3edaa7SPeter Krystad
407ec3edaa7SPeter Krystad return thmac == subflow->thmac;
408ec3edaa7SPeter Krystad }
409ec3edaa7SPeter Krystad
mptcp_subflow_reset(struct sock * ssk)410d5824847SPaolo Abeni void mptcp_subflow_reset(struct sock *ssk)
411d5824847SPaolo Abeni {
4120e4f35d7SPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
4130e4f35d7SPaolo Abeni struct sock *sk = subflow->conn;
4140e4f35d7SPaolo Abeni
4153a236aefSPaolo Abeni /* mptcp_mp_fail_no_response() can reach here on an already closed
4163a236aefSPaolo Abeni * socket
4173a236aefSPaolo Abeni */
4183a236aefSPaolo Abeni if (ssk->sk_state == TCP_CLOSE)
4193a236aefSPaolo Abeni return;
4203a236aefSPaolo Abeni
421ab82e996SFlorian Westphal /* must hold: tcp_done() could drop last reference on parent */
422ab82e996SFlorian Westphal sock_hold(sk);
423ab82e996SFlorian Westphal
424d5824847SPaolo Abeni tcp_send_active_reset(ssk, GFP_ATOMIC);
425d5824847SPaolo Abeni tcp_done(ssk);
426a5cb752bSPaolo Abeni if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
427a5cb752bSPaolo Abeni mptcp_schedule_work(sk);
428ab82e996SFlorian Westphal
429ab82e996SFlorian Westphal sock_put(sk);
430d5824847SPaolo Abeni }
431d5824847SPaolo Abeni
subflow_use_different_dport(struct mptcp_sock * msk,const struct sock * sk)4325bc56388SGeliang Tang static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk)
4335bc56388SGeliang Tang {
4345bc56388SGeliang Tang return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
4355bc56388SGeliang Tang }
4365bc56388SGeliang Tang
__mptcp_sync_state(struct sock * sk,int state)437cf742d09SPaolo Abeni void __mptcp_sync_state(struct sock *sk, int state)
438490274b4SPaolo Abeni {
4399326d035SPaolo Abeni struct mptcp_subflow_context *subflow;
440cf742d09SPaolo Abeni struct mptcp_sock *msk = mptcp_sk(sk);
4419326d035SPaolo Abeni struct sock *ssk = msk->first;
442cf742d09SPaolo Abeni
4439326d035SPaolo Abeni subflow = mptcp_subflow_ctx(ssk);
4449326d035SPaolo Abeni __mptcp_propagate_sndbuf(sk, ssk);
445a2d74374SPaolo Abeni if (!msk->rcvspace_init)
4469326d035SPaolo Abeni mptcp_rcv_space_init(msk, ssk);
4479326d035SPaolo Abeni
448490274b4SPaolo Abeni if (sk->sk_state == TCP_SYN_SENT) {
4499326d035SPaolo Abeni /* subflow->idsn is always available is TCP_SYN_SENT state,
4509326d035SPaolo Abeni * even for the FASTOPEN scenarios
4519326d035SPaolo Abeni */
4529326d035SPaolo Abeni WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
4539326d035SPaolo Abeni WRITE_ONCE(msk->snd_nxt, msk->write_seq);
454d52b3c2bSGeliang Tang mptcp_set_state(sk, state);
455490274b4SPaolo Abeni sk->sk_state_change(sk);
456490274b4SPaolo Abeni }
457490274b4SPaolo Abeni }
458490274b4SPaolo Abeni
subflow_set_remote_key(struct mptcp_sock * msk,struct mptcp_subflow_context * subflow,const struct mptcp_options_received * mp_opt)459b3ea6b27SPaolo Abeni static void subflow_set_remote_key(struct mptcp_sock *msk,
460b3ea6b27SPaolo Abeni struct mptcp_subflow_context *subflow,
461b3ea6b27SPaolo Abeni const struct mptcp_options_received *mp_opt)
462b3ea6b27SPaolo Abeni {
463b3ea6b27SPaolo Abeni /* active MPC subflow will reach here multiple times:
464b3ea6b27SPaolo Abeni * at subflow_finish_connect() time and at 4th ack time
465b3ea6b27SPaolo Abeni */
466b3ea6b27SPaolo Abeni if (subflow->remote_key_valid)
467b3ea6b27SPaolo Abeni return;
468b3ea6b27SPaolo Abeni
469b3ea6b27SPaolo Abeni subflow->remote_key_valid = 1;
470b3ea6b27SPaolo Abeni subflow->remote_key = mp_opt->sndr_key;
471b3ea6b27SPaolo Abeni mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
472b3ea6b27SPaolo Abeni subflow->iasn++;
473b3ea6b27SPaolo Abeni
474b3ea6b27SPaolo Abeni WRITE_ONCE(msk->remote_key, subflow->remote_key);
475b3ea6b27SPaolo Abeni WRITE_ONCE(msk->ack_seq, subflow->iasn);
476b3ea6b27SPaolo Abeni WRITE_ONCE(msk->can_ack, true);
477b3ea6b27SPaolo Abeni atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
478b3ea6b27SPaolo Abeni }
479b3ea6b27SPaolo Abeni
mptcp_propagate_state(struct sock * sk,struct sock * ssk,struct mptcp_subflow_context * subflow,const struct mptcp_options_received * mp_opt)480b45df837SPaolo Abeni static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
481b45df837SPaolo Abeni struct mptcp_subflow_context *subflow,
482b45df837SPaolo Abeni const struct mptcp_options_received *mp_opt)
483b45df837SPaolo Abeni {
484b45df837SPaolo Abeni struct mptcp_sock *msk = mptcp_sk(sk);
485b45df837SPaolo Abeni
486b45df837SPaolo Abeni mptcp_data_lock(sk);
487b45df837SPaolo Abeni if (mp_opt) {
488b45df837SPaolo Abeni /* Options are available only in the non fallback cases
489b45df837SPaolo Abeni * avoid updating rx path fields otherwise
490b45df837SPaolo Abeni */
491b45df837SPaolo Abeni WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
492b45df837SPaolo Abeni WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
493b45df837SPaolo Abeni subflow_set_remote_key(msk, subflow, mp_opt);
494b45df837SPaolo Abeni }
495b45df837SPaolo Abeni
496b45df837SPaolo Abeni if (!sock_owned_by_user(sk)) {
497b45df837SPaolo Abeni __mptcp_sync_state(sk, ssk->sk_state);
498b45df837SPaolo Abeni } else {
499b45df837SPaolo Abeni msk->pending_state = ssk->sk_state;
500b45df837SPaolo Abeni __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
501b45df837SPaolo Abeni }
502b45df837SPaolo Abeni mptcp_data_unlock(sk);
503b45df837SPaolo Abeni }
504b45df837SPaolo Abeni
subflow_finish_connect(struct sock * sk,const struct sk_buff * skb)505cec37a6eSPeter Krystad static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
506cec37a6eSPeter Krystad {
507cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
508cfde141eSPaolo Abeni struct mptcp_options_received mp_opt;
509c3c123d1SDavide Caratti struct sock *parent = subflow->conn;
510b3ea6b27SPaolo Abeni struct mptcp_sock *msk;
511cec37a6eSPeter Krystad
512cec37a6eSPeter Krystad subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
513cec37a6eSPeter Krystad
514263e1201SPaolo Abeni /* be sure no special action on any packet other than syn-ack */
515263e1201SPaolo Abeni if (subflow->conn_finished)
516263e1201SPaolo Abeni return;
517263e1201SPaolo Abeni
518b3ea6b27SPaolo Abeni msk = mptcp_sk(parent);
519b0977bb2SPaolo Abeni subflow->rel_write_seq = 1;
520263e1201SPaolo Abeni subflow->conn_finished = 1;
521e1ff9e82SDavide Caratti subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
5223d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p synack seq=%x\n", subflow, subflow->ssn_offset);
523263e1201SPaolo Abeni
5240799e21bSGeliang Tang mptcp_get_options(skb, &mp_opt);
525fa25e815SPaolo Abeni if (subflow->request_mptcp) {
526d4368227SEric Dumazet if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
527fa25e815SPaolo Abeni MPTCP_INC_STATS(sock_net(sk),
528fa25e815SPaolo Abeni MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
529fa25e815SPaolo Abeni mptcp_do_fallback(sk);
530b3ea6b27SPaolo Abeni pr_fallback(msk);
531fa25e815SPaolo Abeni goto fallback;
532fa25e815SPaolo Abeni }
533fa25e815SPaolo Abeni
53474c7dfbeSPaolo Abeni if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
535b3ea6b27SPaolo Abeni WRITE_ONCE(msk->csum_enabled, true);
536df377be3SGeliang Tang if (mp_opt.deny_join_id0)
537b3ea6b27SPaolo Abeni WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
538263e1201SPaolo Abeni subflow->mp_capable = 1;
5395695eb88SPaolo Abeni MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
540fa25e815SPaolo Abeni mptcp_finish_connect(sk);
541b45df837SPaolo Abeni mptcp_propagate_state(parent, sk, subflow, &mp_opt);
542fa25e815SPaolo Abeni } else if (subflow->request_join) {
543fa25e815SPaolo Abeni u8 hmac[SHA256_DIGEST_SIZE];
544fa25e815SPaolo Abeni
545ad3e8f5cSEric Dumazet if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) {
546dc87efdbSFlorian Westphal subflow->reset_reason = MPTCP_RST_EMPTCP;
547fa25e815SPaolo Abeni goto do_reset;
548dc87efdbSFlorian Westphal }
549fa25e815SPaolo Abeni
5500460ce22SPaolo Abeni subflow->backup = mp_opt.backup;
551cfde141eSPaolo Abeni subflow->thmac = mp_opt.thmac;
552cfde141eSPaolo Abeni subflow->remote_nonce = mp_opt.nonce;
5532dba5774SPaolo Abeni WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
5543d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d\n",
5550460ce22SPaolo Abeni subflow, subflow->thmac, subflow->remote_nonce,
5560460ce22SPaolo Abeni subflow->backup);
557263e1201SPaolo Abeni
558ec3edaa7SPeter Krystad if (!subflow_thmac_valid(subflow)) {
559fc518953SFlorian Westphal MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
560dc87efdbSFlorian Westphal subflow->reset_reason = MPTCP_RST_EMPTCP;
561ec3edaa7SPeter Krystad goto do_reset;
562ec3edaa7SPeter Krystad }
563ec3edaa7SPeter Krystad
5640a4d8e96SJianguo Wu if (!mptcp_finish_join(sk))
5650a4d8e96SJianguo Wu goto do_reset;
5660a4d8e96SJianguo Wu
567ec3edaa7SPeter Krystad subflow_generate_hmac(subflow->local_key, subflow->remote_key,
568ec3edaa7SPeter Krystad subflow->local_nonce,
569ec3edaa7SPeter Krystad subflow->remote_nonce,
570bd697222STodd Malsbary hmac);
571bd697222STodd Malsbary memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
572ec3edaa7SPeter Krystad
573fa25e815SPaolo Abeni subflow->mp_join = 1;
574fc518953SFlorian Westphal MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
5755bc56388SGeliang Tang
5766721cbaaSMatthieu Baerts (NGI0) if (subflow->backup)
5776721cbaaSMatthieu Baerts (NGI0) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
5786721cbaaSMatthieu Baerts (NGI0)
579b3ea6b27SPaolo Abeni if (subflow_use_different_dport(msk, sk)) {
5803d2e1b82SMatthieu Baerts (NGI0) pr_debug("synack inet_dport=%d %d\n",
5815bc56388SGeliang Tang ntohs(inet_sk(sk)->inet_dport),
5825bc56388SGeliang Tang ntohs(inet_sk(parent)->inet_dport));
5832fbdd9eaSGeliang Tang MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
5845bc56388SGeliang Tang }
585fa25e815SPaolo Abeni } else if (mptcp_check_fallback(sk)) {
586fa25e815SPaolo Abeni fallback:
587b45df837SPaolo Abeni mptcp_propagate_state(parent, sk, subflow, NULL);
588fa25e815SPaolo Abeni }
589fa25e815SPaolo Abeni return;
590fa25e815SPaolo Abeni
591ec3edaa7SPeter Krystad do_reset:
592dc87efdbSFlorian Westphal subflow->reset_transient = 0;
593d5824847SPaolo Abeni mptcp_subflow_reset(sk);
594cec37a6eSPeter Krystad }
595cec37a6eSPeter Krystad
subflow_set_local_id(struct mptcp_subflow_context * subflow,int local_id)5964cf86ae8SPaolo Abeni static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
5974cf86ae8SPaolo Abeni {
598ba2cf922SPaolo Abeni WARN_ON_ONCE(local_id < 0 || local_id > 255);
599ba2cf922SPaolo Abeni WRITE_ONCE(subflow->local_id, local_id);
6004cf86ae8SPaolo Abeni }
6014cf86ae8SPaolo Abeni
subflow_chk_local_id(struct sock * sk)6024cf86ae8SPaolo Abeni static int subflow_chk_local_id(struct sock *sk)
6034cf86ae8SPaolo Abeni {
6044cf86ae8SPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
6054cf86ae8SPaolo Abeni struct mptcp_sock *msk = mptcp_sk(subflow->conn);
6064cf86ae8SPaolo Abeni int err;
6074cf86ae8SPaolo Abeni
608ba2cf922SPaolo Abeni if (likely(subflow->local_id >= 0))
6094cf86ae8SPaolo Abeni return 0;
6104cf86ae8SPaolo Abeni
6114cf86ae8SPaolo Abeni err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
6124cf86ae8SPaolo Abeni if (err < 0)
6134cf86ae8SPaolo Abeni return err;
6144cf86ae8SPaolo Abeni
6154cf86ae8SPaolo Abeni subflow_set_local_id(subflow, err);
616fff9b920SMatthieu Baerts (NGI0) subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk);
617fff9b920SMatthieu Baerts (NGI0)
6184cf86ae8SPaolo Abeni return 0;
6194cf86ae8SPaolo Abeni }
6204cf86ae8SPaolo Abeni
subflow_rebuild_header(struct sock * sk)6214cf86ae8SPaolo Abeni static int subflow_rebuild_header(struct sock *sk)
6224cf86ae8SPaolo Abeni {
6234cf86ae8SPaolo Abeni int err = subflow_chk_local_id(sk);
6244cf86ae8SPaolo Abeni
6254cf86ae8SPaolo Abeni if (unlikely(err < 0))
6264cf86ae8SPaolo Abeni return err;
6274cf86ae8SPaolo Abeni
6284cf86ae8SPaolo Abeni return inet_sk_rebuild_header(sk);
6294cf86ae8SPaolo Abeni }
6304cf86ae8SPaolo Abeni
6314cf86ae8SPaolo Abeni #if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_v6_rebuild_header(struct sock * sk)6324cf86ae8SPaolo Abeni static int subflow_v6_rebuild_header(struct sock *sk)
6334cf86ae8SPaolo Abeni {
6344cf86ae8SPaolo Abeni int err = subflow_chk_local_id(sk);
6354cf86ae8SPaolo Abeni
6364cf86ae8SPaolo Abeni if (unlikely(err < 0))
6374cf86ae8SPaolo Abeni return err;
6384cf86ae8SPaolo Abeni
6394cf86ae8SPaolo Abeni return inet6_sk_rebuild_header(sk);
6404cf86ae8SPaolo Abeni }
6414cf86ae8SPaolo Abeni #endif
6424cf86ae8SPaolo Abeni
64334b21d1dSMatthieu Baerts static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init;
64451fa7f8eSFlorian Westphal static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
645cec37a6eSPeter Krystad
subflow_v4_conn_request(struct sock * sk,struct sk_buff * skb)646cec37a6eSPeter Krystad static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
647cec37a6eSPeter Krystad {
648cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
649cec37a6eSPeter Krystad
6503d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p\n", subflow);
651cec37a6eSPeter Krystad
652cec37a6eSPeter Krystad /* Never answer to SYNs sent to broadcast or multicast */
653cec37a6eSPeter Krystad if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
654cec37a6eSPeter Krystad goto drop;
655cec37a6eSPeter Krystad
65634b21d1dSMatthieu Baerts return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops,
657cec37a6eSPeter Krystad &subflow_request_sock_ipv4_ops,
658cec37a6eSPeter Krystad sk, skb);
659cec37a6eSPeter Krystad drop:
660cec37a6eSPeter Krystad tcp_listendrop(sk);
661cec37a6eSPeter Krystad return 0;
662cec37a6eSPeter Krystad }
663cec37a6eSPeter Krystad
subflow_v4_req_destructor(struct request_sock * req)664d3295feeSMatthieu Baerts static void subflow_v4_req_destructor(struct request_sock *req)
665d3295feeSMatthieu Baerts {
666d3295feeSMatthieu Baerts subflow_req_destructor(req);
667d3295feeSMatthieu Baerts tcp_request_sock_ops.destructor(req);
668d3295feeSMatthieu Baerts }
669d3295feeSMatthieu Baerts
670cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
67134b21d1dSMatthieu Baerts static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init;
67251fa7f8eSFlorian Westphal static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
67351fa7f8eSFlorian Westphal static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
67451fa7f8eSFlorian Westphal static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
675822467a4SGeliang Tang static struct proto tcpv6_prot_override __ro_after_init;
676cec37a6eSPeter Krystad
subflow_v6_conn_request(struct sock * sk,struct sk_buff * skb)677cec37a6eSPeter Krystad static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
678cec37a6eSPeter Krystad {
679cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
680cec37a6eSPeter Krystad
6813d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p\n", subflow);
682cec37a6eSPeter Krystad
683cec37a6eSPeter Krystad if (skb->protocol == htons(ETH_P_IP))
684cec37a6eSPeter Krystad return subflow_v4_conn_request(sk, skb);
685cec37a6eSPeter Krystad
686cec37a6eSPeter Krystad if (!ipv6_unicast_destination(skb))
687cec37a6eSPeter Krystad goto drop;
688cec37a6eSPeter Krystad
689dcc32f4fSJakub Kicinski if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
690dcc32f4fSJakub Kicinski __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
691dcc32f4fSJakub Kicinski return 0;
692dcc32f4fSJakub Kicinski }
693dcc32f4fSJakub Kicinski
69434b21d1dSMatthieu Baerts return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops,
695cec37a6eSPeter Krystad &subflow_request_sock_ipv6_ops, sk, skb);
696cec37a6eSPeter Krystad
697cec37a6eSPeter Krystad drop:
698cec37a6eSPeter Krystad tcp_listendrop(sk);
699cec37a6eSPeter Krystad return 0; /* don't send reset */
700cec37a6eSPeter Krystad }
701d3295feeSMatthieu Baerts
subflow_v6_req_destructor(struct request_sock * req)702d3295feeSMatthieu Baerts static void subflow_v6_req_destructor(struct request_sock *req)
703d3295feeSMatthieu Baerts {
704d3295feeSMatthieu Baerts subflow_req_destructor(req);
705d3295feeSMatthieu Baerts tcp6_request_sock_ops.destructor(req);
706d3295feeSMatthieu Baerts }
707cec37a6eSPeter Krystad #endif
708cec37a6eSPeter Krystad
mptcp_subflow_reqsk_alloc(const struct request_sock_ops * ops,struct sock * sk_listener,bool attach_listener)7093fff8818SMatthieu Baerts struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
7103fff8818SMatthieu Baerts struct sock *sk_listener,
7113fff8818SMatthieu Baerts bool attach_listener)
7123fff8818SMatthieu Baerts {
71334b21d1dSMatthieu Baerts if (ops->family == AF_INET)
71434b21d1dSMatthieu Baerts ops = &mptcp_subflow_v4_request_sock_ops;
71534b21d1dSMatthieu Baerts #if IS_ENABLED(CONFIG_MPTCP_IPV6)
71634b21d1dSMatthieu Baerts else if (ops->family == AF_INET6)
71734b21d1dSMatthieu Baerts ops = &mptcp_subflow_v6_request_sock_ops;
71834b21d1dSMatthieu Baerts #endif
7193fff8818SMatthieu Baerts
7203fff8818SMatthieu Baerts return inet_reqsk_alloc(ops, sk_listener, attach_listener);
7213fff8818SMatthieu Baerts }
7223fff8818SMatthieu Baerts EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc);
7233fff8818SMatthieu Baerts
724f296234cSPeter Krystad /* validate hmac received in third ACK */
subflow_hmac_valid(const struct request_sock * req,const struct mptcp_options_received * mp_opt)725f296234cSPeter Krystad static bool subflow_hmac_valid(const struct request_sock *req,
726cfde141eSPaolo Abeni const struct mptcp_options_received *mp_opt)
727f296234cSPeter Krystad {
728f296234cSPeter Krystad const struct mptcp_subflow_request_sock *subflow_req;
729bd697222STodd Malsbary u8 hmac[SHA256_DIGEST_SIZE];
730f296234cSPeter Krystad struct mptcp_sock *msk;
731f296234cSPeter Krystad
732f296234cSPeter Krystad subflow_req = mptcp_subflow_rsk(req);
7338fd4de12SPaolo Abeni msk = subflow_req->msk;
734f296234cSPeter Krystad if (!msk)
735f296234cSPeter Krystad return false;
736f296234cSPeter Krystad
737f296234cSPeter Krystad subflow_generate_hmac(msk->remote_key, msk->local_key,
738f296234cSPeter Krystad subflow_req->remote_nonce,
739f296234cSPeter Krystad subflow_req->local_nonce, hmac);
740f296234cSPeter Krystad
7418fd4de12SPaolo Abeni return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
742f296234cSPeter Krystad }
743f296234cSPeter Krystad
subflow_ulp_fallback(struct sock * sk,struct mptcp_subflow_context * old_ctx)7444c8941deSPaolo Abeni static void subflow_ulp_fallback(struct sock *sk,
7454c8941deSPaolo Abeni struct mptcp_subflow_context *old_ctx)
7464c8941deSPaolo Abeni {
7474c8941deSPaolo Abeni struct inet_connection_sock *icsk = inet_csk(sk);
7484c8941deSPaolo Abeni
7494c8941deSPaolo Abeni mptcp_subflow_tcp_fallback(sk, old_ctx);
7504c8941deSPaolo Abeni icsk->icsk_ulp_ops = NULL;
7514c8941deSPaolo Abeni rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
7524c8941deSPaolo Abeni tcp_sk(sk)->is_mptcp = 0;
753b19bc294SPaolo Abeni
754b19bc294SPaolo Abeni mptcp_subflow_ops_undo_override(sk);
7554c8941deSPaolo Abeni }
7564c8941deSPaolo Abeni
mptcp_subflow_drop_ctx(struct sock * ssk)757b6985b9bSPaolo Abeni void mptcp_subflow_drop_ctx(struct sock *ssk)
75839884604SPaolo Abeni {
75939884604SPaolo Abeni struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
76039884604SPaolo Abeni
76139884604SPaolo Abeni if (!ctx)
76239884604SPaolo Abeni return;
76339884604SPaolo Abeni
76463740448SPaolo Abeni list_del(&mptcp_subflow_ctx(ssk)->node);
76563740448SPaolo Abeni if (inet_csk(ssk)->icsk_ulp_ops) {
76639884604SPaolo Abeni subflow_ulp_fallback(ssk, ctx);
76739884604SPaolo Abeni if (ctx->conn)
76839884604SPaolo Abeni sock_put(ctx->conn);
76963740448SPaolo Abeni }
77039884604SPaolo Abeni
77139884604SPaolo Abeni kfree_rcu(ctx, rcu);
77239884604SPaolo Abeni }
77339884604SPaolo Abeni
__mptcp_subflow_fully_established(struct mptcp_sock * msk,struct mptcp_subflow_context * subflow,const struct mptcp_options_received * mp_opt)774b45df837SPaolo Abeni void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
775b45df837SPaolo Abeni struct mptcp_subflow_context *subflow,
776b3ea6b27SPaolo Abeni const struct mptcp_options_received *mp_opt)
777b93df08cSPaolo Abeni {
778b3ea6b27SPaolo Abeni subflow_set_remote_key(msk, subflow, mp_opt);
779b93df08cSPaolo Abeni subflow->fully_established = 1;
780b93df08cSPaolo Abeni WRITE_ONCE(msk->fully_established, true);
781dfc8d060SDmytro Shytyi
782dfc8d060SDmytro Shytyi if (subflow->is_mptfo)
783b45df837SPaolo Abeni __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
784b93df08cSPaolo Abeni }
785b93df08cSPaolo Abeni
subflow_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)786cec37a6eSPeter Krystad static struct sock *subflow_syn_recv_sock(const struct sock *sk,
787cec37a6eSPeter Krystad struct sk_buff *skb,
788cec37a6eSPeter Krystad struct request_sock *req,
789cec37a6eSPeter Krystad struct dst_entry *dst,
790cec37a6eSPeter Krystad struct request_sock *req_unhash,
791cec37a6eSPeter Krystad bool *own_req)
792cec37a6eSPeter Krystad {
793cec37a6eSPeter Krystad struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
794cc7972eaSChristoph Paasch struct mptcp_subflow_request_sock *subflow_req;
795cfde141eSPaolo Abeni struct mptcp_options_received mp_opt;
7969e365ff5SPaolo Abeni bool fallback, fallback_is_fatal;
7973a236aefSPaolo Abeni struct mptcp_sock *owner;
798cec37a6eSPeter Krystad struct sock *child;
799cec37a6eSPeter Krystad
8003d2e1b82SMatthieu Baerts (NGI0) pr_debug("listener=%p, req=%p, conn=%p\n", listener, req, listener->conn);
801cec37a6eSPeter Krystad
80274c7dfbeSPaolo Abeni /* After child creation we must look for MPC even when options
8039e365ff5SPaolo Abeni * are not parsed
804cfde141eSPaolo Abeni */
80574c7dfbeSPaolo Abeni mp_opt.suboptions = 0;
8069e365ff5SPaolo Abeni
8079e365ff5SPaolo Abeni /* hopefully temporary handling for MP_JOIN+syncookie */
8089e365ff5SPaolo Abeni subflow_req = mptcp_subflow_rsk(req);
809b7514694SPaolo Abeni fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
8109e365ff5SPaolo Abeni fallback = !tcp_rsk(req)->is_mptcp;
8119e365ff5SPaolo Abeni if (fallback)
812ae2dd716SFlorian Westphal goto create_child;
813ae2dd716SFlorian Westphal
814d22f4988SChristoph Paasch /* if the sk is MP_CAPABLE, we try to fetch the client key */
815cc7972eaSChristoph Paasch if (subflow_req->mp_capable) {
81606f9a435SPaolo Abeni /* we can receive and accept an in-window, out-of-order pkt,
81706f9a435SPaolo Abeni * which may not carry the MP_CAPABLE opt even on mptcp enabled
81806f9a435SPaolo Abeni * paths: always try to extract the peer key, and fallback
81906f9a435SPaolo Abeni * for packets missing it.
82006f9a435SPaolo Abeni * Even OoO DSS packets coming legitly after dropped or
82106f9a435SPaolo Abeni * reordered MPC will cause fallback, but we don't have other
82206f9a435SPaolo Abeni * options.
823d22f4988SChristoph Paasch */
8240799e21bSGeliang Tang mptcp_get_options(skb, &mp_opt);
82572b45857SPaolo Abeni if (!(mp_opt.suboptions &
82672b45857SPaolo Abeni (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK)))
8274c8941deSPaolo Abeni fallback = true;
82858b09919SPaolo Abeni
829f296234cSPeter Krystad } else if (subflow_req->mp_join) {
8300799e21bSGeliang Tang mptcp_get_options(skb, &mp_opt);
8314701d328SEric Dumazet if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
83274c7dfbeSPaolo Abeni !subflow_hmac_valid(req, &mp_opt) ||
833d3ab7885SPaolo Abeni !mptcp_can_accept_new_subflow(subflow_req->msk)) {
834fc518953SFlorian Westphal SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
8359e365ff5SPaolo Abeni fallback = true;
836d22f4988SChristoph Paasch }
837fc518953SFlorian Westphal }
838d22f4988SChristoph Paasch
839d22f4988SChristoph Paasch create_child:
840cec37a6eSPeter Krystad child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
841cec37a6eSPeter Krystad req_unhash, own_req);
842cec37a6eSPeter Krystad
843cec37a6eSPeter Krystad if (child && *own_req) {
84479c0949eSPeter Krystad struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
84579c0949eSPeter Krystad
84690bf4513SPaolo Abeni tcp_rsk(req)->drop_req = false;
84790bf4513SPaolo Abeni
8484c8941deSPaolo Abeni /* we need to fallback on ctx allocation failure and on pre-reqs
8494c8941deSPaolo Abeni * checking above. In the latter scenario we additionally need
8504c8941deSPaolo Abeni * to reset the context to non MPTCP status.
85179c0949eSPeter Krystad */
8524c8941deSPaolo Abeni if (!ctx || fallback) {
853dc87efdbSFlorian Westphal if (fallback_is_fatal) {
854dc87efdbSFlorian Westphal subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
855729cd643SPaolo Abeni goto dispose_child;
856dc87efdbSFlorian Westphal }
857a88d0092SPaolo Abeni goto fallback;
858f296234cSPeter Krystad }
85979c0949eSPeter Krystad
860df00b087SFlorian Westphal /* ssk inherits options of listener sk */
861df00b087SFlorian Westphal ctx->setsockopt_seq = listener->setsockopt_seq;
862df00b087SFlorian Westphal
86379c0949eSPeter Krystad if (ctx->mp_capable) {
8647e8b88ecSPaolo Abeni ctx->conn = mptcp_sk_clone_init(listener->conn, &mp_opt, child, req);
865a88d0092SPaolo Abeni if (!ctx->conn)
866a88d0092SPaolo Abeni goto fallback;
867a88d0092SPaolo Abeni
8686f06b4d4SPaolo Abeni ctx->subflow_id = 1;
869a88d0092SPaolo Abeni owner = mptcp_sk(ctx->conn);
8703a236aefSPaolo Abeni mptcp_pm_new_connection(owner, child, 1);
871e72e4032SPaolo Abeni
872fca5c82cSPaolo Abeni /* with OoO packets we can reach here without ingress
873fca5c82cSPaolo Abeni * mpc option
874fca5c82cSPaolo Abeni */
8753a236aefSPaolo Abeni if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
8767a486c44SPaolo Abeni mptcp_pm_fully_established(owner, child);
8773a236aefSPaolo Abeni ctx->pm_notified = 1;
8783a236aefSPaolo Abeni }
879f296234cSPeter Krystad } else if (ctx->mp_join) {
8808fd4de12SPaolo Abeni owner = subflow_req->msk;
881dc87efdbSFlorian Westphal if (!owner) {
882dc87efdbSFlorian Westphal subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
883729cd643SPaolo Abeni goto dispose_child;
884dc87efdbSFlorian Westphal }
885f296234cSPeter Krystad
8868fd4de12SPaolo Abeni /* move the msk reference ownership to the subflow */
8878fd4de12SPaolo Abeni subflow_req->msk = NULL;
888f296234cSPeter Krystad ctx->conn = (struct sock *)owner;
8895bc56388SGeliang Tang
8905bc56388SGeliang Tang if (subflow_use_different_sport(owner, sk)) {
8913d2e1b82SMatthieu Baerts (NGI0) pr_debug("ack inet_sport=%d %d\n",
8925bc56388SGeliang Tang ntohs(inet_sk(sk)->inet_sport),
8935bc56388SGeliang Tang ntohs(inet_sk((struct sock *)owner)->inet_sport));
8942fbdd9eaSGeliang Tang if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
8952fbdd9eaSGeliang Tang SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
8969238e900SGeliang Tang goto dispose_child;
8975bc56388SGeliang Tang }
8982fbdd9eaSGeliang Tang SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
8992fbdd9eaSGeliang Tang }
9009238e900SGeliang Tang
9019238e900SGeliang Tang if (!mptcp_finish_join(child))
9029238e900SGeliang Tang goto dispose_child;
9039238e900SGeliang Tang
9049238e900SGeliang Tang SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
9059238e900SGeliang Tang tcp_rsk(req)->drop_req = true;
906cec37a6eSPeter Krystad }
907cec37a6eSPeter Krystad }
908cec37a6eSPeter Krystad
9094c8941deSPaolo Abeni /* check for expected invariant - should never trigger, just help
9104c8941deSPaolo Abeni * catching eariler subtle bugs
9114c8941deSPaolo Abeni */
912ac2b47fbSPaolo Abeni WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
9134c8941deSPaolo Abeni (!mptcp_subflow_ctx(child) ||
9144c8941deSPaolo Abeni !mptcp_subflow_ctx(child)->conn));
915cec37a6eSPeter Krystad return child;
916f296234cSPeter Krystad
917729cd643SPaolo Abeni dispose_child:
918b6985b9bSPaolo Abeni mptcp_subflow_drop_ctx(child);
919729cd643SPaolo Abeni tcp_rsk(req)->drop_req = true;
920729cd643SPaolo Abeni inet_csk_prepare_for_destroy_sock(child);
921f296234cSPeter Krystad tcp_done(child);
92297e61751SPaolo Abeni req->rsk_ops->send_reset(sk, skb);
923729cd643SPaolo Abeni
924729cd643SPaolo Abeni /* The last child reference will be released by the caller */
925729cd643SPaolo Abeni return child;
926a88d0092SPaolo Abeni
927a88d0092SPaolo Abeni fallback:
92843eca11bSDavide Caratti if (fallback)
92943eca11bSDavide Caratti SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
930a88d0092SPaolo Abeni mptcp_subflow_drop_ctx(child);
931a88d0092SPaolo Abeni return child;
932cec37a6eSPeter Krystad }
933cec37a6eSPeter Krystad
93451fa7f8eSFlorian Westphal static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
935822467a4SGeliang Tang static struct proto tcp_prot_override __ro_after_init;
936cec37a6eSPeter Krystad
937648ef4b8SMat Martineau enum mapping_status {
938648ef4b8SMat Martineau MAPPING_OK,
939648ef4b8SMat Martineau MAPPING_INVALID,
940648ef4b8SMat Martineau MAPPING_EMPTY,
941e1ff9e82SDavide Caratti MAPPING_DATA_FIN,
94231bf11deSPaolo Abeni MAPPING_DUMMY,
94331bf11deSPaolo Abeni MAPPING_BAD_CSUM
944648ef4b8SMat Martineau };
945648ef4b8SMat Martineau
dbg_bad_map(struct mptcp_subflow_context * subflow,u32 ssn)94661e71022SPaolo Abeni static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
947648ef4b8SMat Martineau {
9483d2e1b82SMatthieu Baerts (NGI0) pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d\n",
949648ef4b8SMat Martineau ssn, subflow->map_subflow_seq, subflow->map_data_len);
950648ef4b8SMat Martineau }
951648ef4b8SMat Martineau
skb_is_fully_mapped(struct sock * ssk,struct sk_buff * skb)952648ef4b8SMat Martineau static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
953648ef4b8SMat Martineau {
954648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
955648ef4b8SMat Martineau unsigned int skb_consumed;
956648ef4b8SMat Martineau
957648ef4b8SMat Martineau skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
958b8be15d1SPaolo Abeni if (unlikely(skb_consumed >= skb->len)) {
959b8be15d1SPaolo Abeni DEBUG_NET_WARN_ON_ONCE(1);
960648ef4b8SMat Martineau return true;
961b8be15d1SPaolo Abeni }
962648ef4b8SMat Martineau
963648ef4b8SMat Martineau return skb->len - skb_consumed <= subflow->map_data_len -
964648ef4b8SMat Martineau mptcp_subflow_get_map_offset(subflow);
965648ef4b8SMat Martineau }
966648ef4b8SMat Martineau
validate_mapping(struct sock * ssk,struct sk_buff * skb)967648ef4b8SMat Martineau static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
968648ef4b8SMat Martineau {
969648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
970648ef4b8SMat Martineau u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
971648ef4b8SMat Martineau
972648ef4b8SMat Martineau if (unlikely(before(ssn, subflow->map_subflow_seq))) {
973648ef4b8SMat Martineau /* Mapping covers data later in the subflow stream,
974648ef4b8SMat Martineau * currently unsupported.
975648ef4b8SMat Martineau */
97661e71022SPaolo Abeni dbg_bad_map(subflow, ssn);
977648ef4b8SMat Martineau return false;
978648ef4b8SMat Martineau }
979648ef4b8SMat Martineau if (unlikely(!before(ssn, subflow->map_subflow_seq +
980648ef4b8SMat Martineau subflow->map_data_len))) {
981648ef4b8SMat Martineau /* Mapping does covers past subflow data, invalid */
98261e71022SPaolo Abeni dbg_bad_map(subflow, ssn);
983648ef4b8SMat Martineau return false;
984648ef4b8SMat Martineau }
985648ef4b8SMat Martineau return true;
986648ef4b8SMat Martineau }
987648ef4b8SMat Martineau
validate_data_csum(struct sock * ssk,struct sk_buff * skb,bool csum_reqd)988dd8bcd17SPaolo Abeni static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
989dd8bcd17SPaolo Abeni bool csum_reqd)
990dd8bcd17SPaolo Abeni {
991dd8bcd17SPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
992dd8bcd17SPaolo Abeni u32 offset, seq, delta;
993ba2c89e0SPaolo Abeni __sum16 csum;
994dd8bcd17SPaolo Abeni int len;
995dd8bcd17SPaolo Abeni
996dd8bcd17SPaolo Abeni if (!csum_reqd)
997dd8bcd17SPaolo Abeni return MAPPING_OK;
998dd8bcd17SPaolo Abeni
999dd8bcd17SPaolo Abeni /* mapping already validated on previous traversal */
1000dd8bcd17SPaolo Abeni if (subflow->map_csum_len == subflow->map_data_len)
1001dd8bcd17SPaolo Abeni return MAPPING_OK;
1002dd8bcd17SPaolo Abeni
1003dd8bcd17SPaolo Abeni /* traverse the receive queue, ensuring it contains a full
1004dd8bcd17SPaolo Abeni * DSS mapping and accumulating the related csum.
1005dd8bcd17SPaolo Abeni * Preserve the accoumlate csum across multiple calls, to compute
1006dd8bcd17SPaolo Abeni * the csum only once
1007dd8bcd17SPaolo Abeni */
1008dd8bcd17SPaolo Abeni delta = subflow->map_data_len - subflow->map_csum_len;
1009dd8bcd17SPaolo Abeni for (;;) {
1010dd8bcd17SPaolo Abeni seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
1011dd8bcd17SPaolo Abeni offset = seq - TCP_SKB_CB(skb)->seq;
1012dd8bcd17SPaolo Abeni
1013dd8bcd17SPaolo Abeni /* if the current skb has not been accounted yet, csum its contents
1014dd8bcd17SPaolo Abeni * up to the amount covered by the current DSS
1015dd8bcd17SPaolo Abeni */
1016dd8bcd17SPaolo Abeni if (offset < skb->len) {
1017dd8bcd17SPaolo Abeni __wsum csum;
1018dd8bcd17SPaolo Abeni
1019dd8bcd17SPaolo Abeni len = min(skb->len - offset, delta);
1020dd8bcd17SPaolo Abeni csum = skb_checksum(skb, offset, len, 0);
1021dd8bcd17SPaolo Abeni subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
1022dd8bcd17SPaolo Abeni subflow->map_csum_len);
1023dd8bcd17SPaolo Abeni
1024dd8bcd17SPaolo Abeni delta -= len;
1025dd8bcd17SPaolo Abeni subflow->map_csum_len += len;
1026dd8bcd17SPaolo Abeni }
1027dd8bcd17SPaolo Abeni if (delta == 0)
1028dd8bcd17SPaolo Abeni break;
1029dd8bcd17SPaolo Abeni
1030dd8bcd17SPaolo Abeni if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
1031dd8bcd17SPaolo Abeni /* if this subflow is closed, the partial mapping
1032dd8bcd17SPaolo Abeni * will be never completed; flush the pending skbs, so
1033dd8bcd17SPaolo Abeni * that subflow_sched_work_if_closed() can kick in
1034dd8bcd17SPaolo Abeni */
1035dd8bcd17SPaolo Abeni if (unlikely(ssk->sk_state == TCP_CLOSE))
1036dd8bcd17SPaolo Abeni while ((skb = skb_peek(&ssk->sk_receive_queue)))
1037dd8bcd17SPaolo Abeni sk_eat_skb(ssk, skb);
1038dd8bcd17SPaolo Abeni
1039dd8bcd17SPaolo Abeni /* not enough data to validate the csum */
1040dd8bcd17SPaolo Abeni return MAPPING_EMPTY;
1041dd8bcd17SPaolo Abeni }
1042dd8bcd17SPaolo Abeni
1043dd8bcd17SPaolo Abeni /* the DSS mapping for next skbs will be validated later,
1044dd8bcd17SPaolo Abeni * when a get_mapping_status call will process such skb
1045dd8bcd17SPaolo Abeni */
1046dd8bcd17SPaolo Abeni skb = skb->next;
1047dd8bcd17SPaolo Abeni }
1048dd8bcd17SPaolo Abeni
1049dd8bcd17SPaolo Abeni /* note that 'map_data_len' accounts only for the carried data, does
1050dd8bcd17SPaolo Abeni * not include the eventual seq increment due to the data fin,
1051dd8bcd17SPaolo Abeni * while the pseudo header requires the original DSS data len,
1052dd8bcd17SPaolo Abeni * including that
1053dd8bcd17SPaolo Abeni */
10548401e87fSGeliang Tang csum = __mptcp_make_csum(subflow->map_seq,
10558401e87fSGeliang Tang subflow->map_subflow_seq,
10568401e87fSGeliang Tang subflow->map_data_len + subflow->map_data_fin,
10578401e87fSGeliang Tang subflow->map_data_csum);
10588401e87fSGeliang Tang if (unlikely(csum)) {
1059fe3ab1cbSGeliang Tang MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
106031bf11deSPaolo Abeni return MAPPING_BAD_CSUM;
1061fe3ab1cbSGeliang Tang }
1062dd8bcd17SPaolo Abeni
1063ae66fb2bSMat Martineau subflow->valid_csum_seen = 1;
1064dd8bcd17SPaolo Abeni return MAPPING_OK;
1065dd8bcd17SPaolo Abeni }
1066dd8bcd17SPaolo Abeni
get_mapping_status(struct sock * ssk,struct mptcp_sock * msk)106743b54c6eSMat Martineau static enum mapping_status get_mapping_status(struct sock *ssk,
106843b54c6eSMat Martineau struct mptcp_sock *msk)
1069648ef4b8SMat Martineau {
1070648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1071dd8bcd17SPaolo Abeni bool csum_reqd = READ_ONCE(msk->csum_enabled);
1072648ef4b8SMat Martineau struct mptcp_ext *mpext;
1073648ef4b8SMat Martineau struct sk_buff *skb;
1074648ef4b8SMat Martineau u16 data_len;
1075648ef4b8SMat Martineau u64 map_seq;
1076648ef4b8SMat Martineau
1077648ef4b8SMat Martineau skb = skb_peek(&ssk->sk_receive_queue);
1078648ef4b8SMat Martineau if (!skb)
1079648ef4b8SMat Martineau return MAPPING_EMPTY;
1080648ef4b8SMat Martineau
1081e1ff9e82SDavide Caratti if (mptcp_check_fallback(ssk))
1082e1ff9e82SDavide Caratti return MAPPING_DUMMY;
1083e1ff9e82SDavide Caratti
1084648ef4b8SMat Martineau mpext = mptcp_get_ext(skb);
1085648ef4b8SMat Martineau if (!mpext || !mpext->use_map) {
1086648ef4b8SMat Martineau if (!subflow->map_valid && !skb->len) {
1087648ef4b8SMat Martineau /* the TCP stack deliver 0 len FIN pkt to the receive
1088648ef4b8SMat Martineau * queue, that is the only 0len pkts ever expected here,
1089648ef4b8SMat Martineau * and we can admit no mapping only for 0 len pkts
1090648ef4b8SMat Martineau */
1091648ef4b8SMat Martineau if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1092648ef4b8SMat Martineau WARN_ONCE(1, "0len seq %d:%d flags %x",
1093648ef4b8SMat Martineau TCP_SKB_CB(skb)->seq,
1094648ef4b8SMat Martineau TCP_SKB_CB(skb)->end_seq,
1095648ef4b8SMat Martineau TCP_SKB_CB(skb)->tcp_flags);
1096648ef4b8SMat Martineau sk_eat_skb(ssk, skb);
1097648ef4b8SMat Martineau return MAPPING_EMPTY;
1098648ef4b8SMat Martineau }
1099648ef4b8SMat Martineau
1100648ef4b8SMat Martineau if (!subflow->map_valid)
1101648ef4b8SMat Martineau return MAPPING_INVALID;
1102648ef4b8SMat Martineau
1103648ef4b8SMat Martineau goto validate_seq;
1104648ef4b8SMat Martineau }
1105648ef4b8SMat Martineau
11060918e34bSGeliang Tang trace_get_mapping_status(mpext);
1107648ef4b8SMat Martineau
1108648ef4b8SMat Martineau data_len = mpext->data_len;
1109648ef4b8SMat Martineau if (data_len == 0) {
11103d2e1b82SMatthieu Baerts (NGI0) pr_debug("infinite mapping received\n");
1111fc518953SFlorian Westphal MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
1112648ef4b8SMat Martineau return MAPPING_INVALID;
1113648ef4b8SMat Martineau }
1114648ef4b8SMat Martineau
1115648ef4b8SMat Martineau if (mpext->data_fin == 1) {
1116648ef4b8SMat Martineau if (data_len == 1) {
11171a49b2c2SMat Martineau bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
11181a49b2c2SMat Martineau mpext->dsn64);
11193d2e1b82SMatthieu Baerts (NGI0) pr_debug("DATA_FIN with no payload seq=%llu\n", mpext->data_seq);
1120648ef4b8SMat Martineau if (subflow->map_valid) {
1121648ef4b8SMat Martineau /* A DATA_FIN might arrive in a DSS
1122648ef4b8SMat Martineau * option before the previous mapping
1123648ef4b8SMat Martineau * has been fully consumed. Continue
1124648ef4b8SMat Martineau * handling the existing mapping.
1125648ef4b8SMat Martineau */
1126648ef4b8SMat Martineau skb_ext_del(skb, SKB_EXT_MPTCP);
1127648ef4b8SMat Martineau return MAPPING_OK;
1128648ef4b8SMat Martineau } else {
1129a5cb752bSPaolo Abeni if (updated)
1130a5cb752bSPaolo Abeni mptcp_schedule_work((struct sock *)msk);
1131ef59b195SMat Martineau
1132648ef4b8SMat Martineau return MAPPING_DATA_FIN;
1133648ef4b8SMat Martineau }
113443b54c6eSMat Martineau } else {
1135017512a0SPaolo Abeni u64 data_fin_seq = mpext->data_seq + data_len - 1;
11361a49b2c2SMat Martineau
11371a49b2c2SMat Martineau /* If mpext->data_seq is a 32-bit value, data_fin_seq
11381a49b2c2SMat Martineau * must also be limited to 32 bits.
11391a49b2c2SMat Martineau */
11401a49b2c2SMat Martineau if (!mpext->dsn64)
11411a49b2c2SMat Martineau data_fin_seq &= GENMASK_ULL(31, 0);
11421a49b2c2SMat Martineau
11431a49b2c2SMat Martineau mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
11443d2e1b82SMatthieu Baerts (NGI0) pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d\n",
11451a49b2c2SMat Martineau data_fin_seq, mpext->dsn64);
1146648ef4b8SMat Martineau }
1147648ef4b8SMat Martineau
1148648ef4b8SMat Martineau /* Adjust for DATA_FIN using 1 byte of sequence space */
1149648ef4b8SMat Martineau data_len--;
1150648ef4b8SMat Martineau }
1151648ef4b8SMat Martineau
11525957a890SPaolo Abeni map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64);
115337198e93SDavide Caratti WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
1154648ef4b8SMat Martineau
1155648ef4b8SMat Martineau if (subflow->map_valid) {
1156648ef4b8SMat Martineau /* Allow replacing only with an identical map */
1157648ef4b8SMat Martineau if (subflow->map_seq == map_seq &&
1158648ef4b8SMat Martineau subflow->map_subflow_seq == mpext->subflow_seq &&
1159dd8bcd17SPaolo Abeni subflow->map_data_len == data_len &&
1160dd8bcd17SPaolo Abeni subflow->map_csum_reqd == mpext->csum_reqd) {
1161648ef4b8SMat Martineau skb_ext_del(skb, SKB_EXT_MPTCP);
1162dd8bcd17SPaolo Abeni goto validate_csum;
1163648ef4b8SMat Martineau }
1164648ef4b8SMat Martineau
1165648ef4b8SMat Martineau /* If this skb data are fully covered by the current mapping,
1166648ef4b8SMat Martineau * the new map would need caching, which is not supported
1167648ef4b8SMat Martineau */
1168fc518953SFlorian Westphal if (skb_is_fully_mapped(ssk, skb)) {
1169fc518953SFlorian Westphal MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
1170648ef4b8SMat Martineau return MAPPING_INVALID;
1171fc518953SFlorian Westphal }
1172648ef4b8SMat Martineau
1173648ef4b8SMat Martineau /* will validate the next map after consuming the current one */
1174dd8bcd17SPaolo Abeni goto validate_csum;
1175648ef4b8SMat Martineau }
1176648ef4b8SMat Martineau
1177648ef4b8SMat Martineau subflow->map_seq = map_seq;
1178648ef4b8SMat Martineau subflow->map_subflow_seq = mpext->subflow_seq;
1179648ef4b8SMat Martineau subflow->map_data_len = data_len;
1180648ef4b8SMat Martineau subflow->map_valid = 1;
1181dd8bcd17SPaolo Abeni subflow->map_data_fin = mpext->data_fin;
1182d22f4988SChristoph Paasch subflow->mpc_map = mpext->mpc_map;
1183dd8bcd17SPaolo Abeni subflow->map_csum_reqd = mpext->csum_reqd;
1184dd8bcd17SPaolo Abeni subflow->map_csum_len = 0;
1185dd8bcd17SPaolo Abeni subflow->map_data_csum = csum_unfold(mpext->csum);
1186dd8bcd17SPaolo Abeni
1187dd8bcd17SPaolo Abeni /* Cfr RFC 8684 Section 3.3.0 */
1188dd8bcd17SPaolo Abeni if (unlikely(subflow->map_csum_reqd != csum_reqd))
1189dd8bcd17SPaolo Abeni return MAPPING_INVALID;
1190dd8bcd17SPaolo Abeni
11913d2e1b82SMatthieu Baerts (NGI0) pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n",
1192648ef4b8SMat Martineau subflow->map_seq, subflow->map_subflow_seq,
1193dd8bcd17SPaolo Abeni subflow->map_data_len, subflow->map_csum_reqd,
1194dd8bcd17SPaolo Abeni subflow->map_data_csum);
1195648ef4b8SMat Martineau
1196648ef4b8SMat Martineau validate_seq:
1197648ef4b8SMat Martineau /* we revalidate valid mapping on new skb, because we must ensure
1198648ef4b8SMat Martineau * the current skb is completely covered by the available mapping
1199648ef4b8SMat Martineau */
120006285da9SPaolo Abeni if (!validate_mapping(ssk, skb)) {
120106285da9SPaolo Abeni MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
1202648ef4b8SMat Martineau return MAPPING_INVALID;
120306285da9SPaolo Abeni }
1204648ef4b8SMat Martineau
1205648ef4b8SMat Martineau skb_ext_del(skb, SKB_EXT_MPTCP);
1206dd8bcd17SPaolo Abeni
1207dd8bcd17SPaolo Abeni validate_csum:
1208dd8bcd17SPaolo Abeni return validate_data_csum(ssk, skb, csum_reqd);
1209648ef4b8SMat Martineau }
1210648ef4b8SMat Martineau
mptcp_subflow_discard_data(struct sock * ssk,struct sk_buff * skb,u64 limit)121104e4cd4fSPaolo Abeni static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
12121d39cd8cSPaolo Abeni u64 limit)
12136719331cSPaolo Abeni {
12146719331cSPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
121504e4cd4fSPaolo Abeni bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
1216bf39cfc0SPaolo Abeni struct tcp_sock *tp = tcp_sk(ssk);
1217bf39cfc0SPaolo Abeni u32 offset, incr, avail_len;
12186719331cSPaolo Abeni
1219bf39cfc0SPaolo Abeni offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
1220bf39cfc0SPaolo Abeni if (WARN_ON_ONCE(offset > skb->len))
1221bf39cfc0SPaolo Abeni goto out;
12226719331cSPaolo Abeni
1223bf39cfc0SPaolo Abeni avail_len = skb->len - offset;
1224bf39cfc0SPaolo Abeni incr = limit >= avail_len ? avail_len + fin : limit;
1225bf39cfc0SPaolo Abeni
12263d2e1b82SMatthieu Baerts (NGI0) pr_debug("discarding=%d len=%d offset=%d seq=%d\n", incr, skb->len,
1227bf39cfc0SPaolo Abeni offset, subflow->map_subflow_seq);
122806242e44SPaolo Abeni MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
122904e4cd4fSPaolo Abeni tcp_sk(ssk)->copied_seq += incr;
1230bf39cfc0SPaolo Abeni
1231bf39cfc0SPaolo Abeni out:
123204e4cd4fSPaolo Abeni if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
123304e4cd4fSPaolo Abeni sk_eat_skb(ssk, skb);
123404e4cd4fSPaolo Abeni if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
12356719331cSPaolo Abeni subflow->map_valid = 0;
12366719331cSPaolo Abeni }
12376719331cSPaolo Abeni
123840947e13SFlorian Westphal /* sched mptcp worker to remove the subflow if no more data is pending */
subflow_sched_work_if_closed(struct mptcp_sock * msk,struct sock * ssk)123940947e13SFlorian Westphal static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
124040947e13SFlorian Westphal {
1241255bc4fcSMatthieu Baerts (NGI0) struct sock *sk = (struct sock *)msk;
1242255bc4fcSMatthieu Baerts (NGI0)
1243255bc4fcSMatthieu Baerts (NGI0) if (likely(ssk->sk_state != TCP_CLOSE &&
1244255bc4fcSMatthieu Baerts (NGI0) (ssk->sk_state != TCP_CLOSE_WAIT ||
1245255bc4fcSMatthieu Baerts (NGI0) inet_sk_state_load(sk) != TCP_ESTABLISHED)))
124640947e13SFlorian Westphal return;
124740947e13SFlorian Westphal
124840947e13SFlorian Westphal if (skb_queue_empty(&ssk->sk_receive_queue) &&
1249a5cb752bSPaolo Abeni !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
1250255bc4fcSMatthieu Baerts (NGI0) mptcp_schedule_work(sk);
125140947e13SFlorian Westphal }
125240947e13SFlorian Westphal
mptcp_subflow_fail(struct mptcp_sock * msk,struct sock * ssk)125376a13b31SGeliang Tang static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
125476a13b31SGeliang Tang {
125576a13b31SGeliang Tang struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
125676a13b31SGeliang Tang unsigned long fail_tout;
125776a13b31SGeliang Tang
125876a13b31SGeliang Tang /* greceful failure can happen only on the MPC subflow */
125976a13b31SGeliang Tang if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
126076a13b31SGeliang Tang return;
126176a13b31SGeliang Tang
126276a13b31SGeliang Tang /* since the close timeout take precedence on the fail one,
126376a13b31SGeliang Tang * no need to start the latter when the first is already set
126476a13b31SGeliang Tang */
126576a13b31SGeliang Tang if (sock_flag((struct sock *)msk, SOCK_DEAD))
126676a13b31SGeliang Tang return;
126776a13b31SGeliang Tang
126876a13b31SGeliang Tang /* we don't need extreme accuracy here, use a zero fail_tout as special
126976a13b31SGeliang Tang * value meaning no fail timeout at all;
127076a13b31SGeliang Tang */
127176a13b31SGeliang Tang fail_tout = jiffies + TCP_RTO_MAX;
127276a13b31SGeliang Tang if (!fail_tout)
127376a13b31SGeliang Tang fail_tout = 1;
127476a13b31SGeliang Tang WRITE_ONCE(subflow->fail_tout, fail_tout);
127576a13b31SGeliang Tang tcp_send_ack(ssk);
127676a13b31SGeliang Tang
1277f6909dc1SPaolo Abeni mptcp_reset_tout_timer(msk, subflow->fail_tout);
127876a13b31SGeliang Tang }
127976a13b31SGeliang Tang
subflow_check_data_avail(struct sock * ssk)1280648ef4b8SMat Martineau static bool subflow_check_data_avail(struct sock *ssk)
1281648ef4b8SMat Martineau {
1282648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1283648ef4b8SMat Martineau enum mapping_status status;
1284648ef4b8SMat Martineau struct mptcp_sock *msk;
1285648ef4b8SMat Martineau struct sk_buff *skb;
1286648ef4b8SMat Martineau
128747bebdf3SPaolo Abeni if (!skb_peek(&ssk->sk_receive_queue))
1288ea56dcb4SGeliang Tang WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1289648ef4b8SMat Martineau if (subflow->data_avail)
1290648ef4b8SMat Martineau return true;
1291648ef4b8SMat Martineau
1292648ef4b8SMat Martineau msk = mptcp_sk(subflow->conn);
1293648ef4b8SMat Martineau for (;;) {
1294648ef4b8SMat Martineau u64 ack_seq;
1295648ef4b8SMat Martineau u64 old_ack;
1296648ef4b8SMat Martineau
129743b54c6eSMat Martineau status = get_mapping_status(ssk, msk);
1298d96a838aSGeliang Tang trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
129931bf11deSPaolo Abeni if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
130031bf11deSPaolo Abeni status == MAPPING_BAD_CSUM))
1301dea2b1eaSPaolo Abeni goto fallback;
1302648ef4b8SMat Martineau
1303648ef4b8SMat Martineau if (status != MAPPING_OK)
130440947e13SFlorian Westphal goto no_data;
1305648ef4b8SMat Martineau
1306648ef4b8SMat Martineau skb = skb_peek(&ssk->sk_receive_queue);
1307648ef4b8SMat Martineau if (WARN_ON_ONCE(!skb))
130840947e13SFlorian Westphal goto no_data;
1309648ef4b8SMat Martineau
1310b3ea6b27SPaolo Abeni if (unlikely(!READ_ONCE(msk->can_ack)))
1311dea2b1eaSPaolo Abeni goto fallback;
1312d22f4988SChristoph Paasch
1313648ef4b8SMat Martineau old_ack = READ_ONCE(msk->ack_seq);
1314648ef4b8SMat Martineau ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
13153d2e1b82SMatthieu Baerts (NGI0) pr_debug("msk ack_seq=%llx subflow ack_seq=%llx\n", old_ack,
1316648ef4b8SMat Martineau ack_seq);
131799d1055cSPaolo Abeni if (unlikely(before64(ack_seq, old_ack))) {
131899d1055cSPaolo Abeni mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
131999d1055cSPaolo Abeni continue;
132047bebdf3SPaolo Abeni }
1321648ef4b8SMat Martineau
132299d1055cSPaolo Abeni WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
132399d1055cSPaolo Abeni break;
1324648ef4b8SMat Martineau }
1325648ef4b8SMat Martineau return true;
1326648ef4b8SMat Martineau
132740947e13SFlorian Westphal no_data:
132840947e13SFlorian Westphal subflow_sched_work_if_closed(msk, ssk);
132940947e13SFlorian Westphal return false;
1330dea2b1eaSPaolo Abeni
1331dea2b1eaSPaolo Abeni fallback:
13320348c690SGeliang Tang if (!__mptcp_check_fallback(msk)) {
1333dea2b1eaSPaolo Abeni /* RFC 8684 section 3.7. */
133431bf11deSPaolo Abeni if (status == MAPPING_BAD_CSUM &&
133531bf11deSPaolo Abeni (subflow->mp_join || subflow->valid_csum_seen)) {
133631bf11deSPaolo Abeni subflow->send_mp_fail = 1;
133731bf11deSPaolo Abeni
13387b16871fSPaolo Abeni if (!READ_ONCE(msk->allow_infinite_fallback)) {
1339478d7700SGeliang Tang subflow->reset_transient = 0;
1340478d7700SGeliang Tang subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
1341f745a3ebSPaolo Abeni goto reset;
13421761fed2SGeliang Tang }
1343f745a3ebSPaolo Abeni mptcp_subflow_fail(msk, ssk);
1344f745a3ebSPaolo Abeni WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
1345478d7700SGeliang Tang return true;
1346478d7700SGeliang Tang }
1347478d7700SGeliang Tang
1348*3d7e7ef8SMatthieu Baerts (NGI0) if (!READ_ONCE(msk->allow_infinite_fallback)) {
1349dea2b1eaSPaolo Abeni /* fatal protocol error, close the socket.
1350dea2b1eaSPaolo Abeni * subflow_error_report() will introduce the appropriate barriers
1351dea2b1eaSPaolo Abeni */
1352dc87efdbSFlorian Westphal subflow->reset_transient = 0;
1353dc87efdbSFlorian Westphal subflow->reset_reason = MPTCP_RST_EMPTCP;
1354f745a3ebSPaolo Abeni
1355f745a3ebSPaolo Abeni reset:
13569ae8e5adSEric Dumazet WRITE_ONCE(ssk->sk_err, EBADMSG);
1357f745a3ebSPaolo Abeni tcp_set_state(ssk, TCP_CLOSE);
1358f745a3ebSPaolo Abeni while ((skb = skb_peek(&ssk->sk_receive_queue)))
1359f745a3ebSPaolo Abeni sk_eat_skb(ssk, skb);
1360648ef4b8SMat Martineau tcp_send_active_reset(ssk, GFP_ATOMIC);
1361ea56dcb4SGeliang Tang WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1362648ef4b8SMat Martineau return false;
1363648ef4b8SMat Martineau }
1364648ef4b8SMat Martineau
1365d51991e2SPaolo Abeni mptcp_do_fallback(ssk);
13660348c690SGeliang Tang }
13670348c690SGeliang Tang
1368dea2b1eaSPaolo Abeni skb = skb_peek(&ssk->sk_receive_queue);
1369dea2b1eaSPaolo Abeni subflow->map_valid = 1;
1370dea2b1eaSPaolo Abeni subflow->map_seq = READ_ONCE(msk->ack_seq);
1371dea2b1eaSPaolo Abeni subflow->map_data_len = skb->len;
1372dea2b1eaSPaolo Abeni subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
137399d1055cSPaolo Abeni WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
1374dea2b1eaSPaolo Abeni return true;
1375dea2b1eaSPaolo Abeni }
1376dea2b1eaSPaolo Abeni
mptcp_subflow_data_available(struct sock * sk)1377648ef4b8SMat Martineau bool mptcp_subflow_data_available(struct sock *sk)
1378648ef4b8SMat Martineau {
1379648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1380648ef4b8SMat Martineau
1381648ef4b8SMat Martineau /* check if current mapping is still valid */
1382648ef4b8SMat Martineau if (subflow->map_valid &&
1383648ef4b8SMat Martineau mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
1384648ef4b8SMat Martineau subflow->map_valid = 0;
1385ea56dcb4SGeliang Tang WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1386648ef4b8SMat Martineau
13873d2e1b82SMatthieu Baerts (NGI0) pr_debug("Done with mapping: seq=%u data_len=%u\n",
1388648ef4b8SMat Martineau subflow->map_subflow_seq,
1389648ef4b8SMat Martineau subflow->map_data_len);
1390648ef4b8SMat Martineau }
1391648ef4b8SMat Martineau
139247bebdf3SPaolo Abeni return subflow_check_data_avail(sk);
1393648ef4b8SMat Martineau }
1394648ef4b8SMat Martineau
1395071c8ed6SFlorian Westphal /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
1396071c8ed6SFlorian Westphal * not the ssk one.
1397071c8ed6SFlorian Westphal *
1398071c8ed6SFlorian Westphal * In mptcp, rwin is about the mptcp-level connection data.
1399071c8ed6SFlorian Westphal *
1400071c8ed6SFlorian Westphal * Data that is still on the ssk rx queue can thus be ignored,
140155320b82SBhaskar Chowdhury * as far as mptcp peer is concerned that data is still inflight.
1402071c8ed6SFlorian Westphal * DSS ACK is updated when skb is moved to the mptcp rx queue.
1403071c8ed6SFlorian Westphal */
mptcp_space(const struct sock * ssk,int * space,int * full_space)1404071c8ed6SFlorian Westphal void mptcp_space(const struct sock *ssk, int *space, int *full_space)
1405071c8ed6SFlorian Westphal {
1406071c8ed6SFlorian Westphal const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1407071c8ed6SFlorian Westphal const struct sock *sk = subflow->conn;
1408071c8ed6SFlorian Westphal
1409ea4ca586SPaolo Abeni *space = __mptcp_space(sk);
1410b8dc6d6cSPaolo Abeni *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
1411071c8ed6SFlorian Westphal }
1412071c8ed6SFlorian Westphal
subflow_error_report(struct sock * ssk)141315cc1045SPaolo Abeni static void subflow_error_report(struct sock *ssk)
141415cc1045SPaolo Abeni {
141515cc1045SPaolo Abeni struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
141615cc1045SPaolo Abeni
1417b7a679baSPaolo Abeni /* bail early if this is a no-op, so that we avoid introducing a
1418b7a679baSPaolo Abeni * problematic lockdep dependency between TCP accept queue lock
1419b7a679baSPaolo Abeni * and msk socket spinlock
1420b7a679baSPaolo Abeni */
1421b7a679baSPaolo Abeni if (!sk->sk_socket)
1422b7a679baSPaolo Abeni return;
1423b7a679baSPaolo Abeni
142415cc1045SPaolo Abeni mptcp_data_lock(sk);
142515cc1045SPaolo Abeni if (!sock_owned_by_user(sk))
142615cc1045SPaolo Abeni __mptcp_error_report(sk);
142715cc1045SPaolo Abeni else
1428e9d09bacSPaolo Abeni __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
142915cc1045SPaolo Abeni mptcp_data_unlock(sk);
143015cc1045SPaolo Abeni }
143115cc1045SPaolo Abeni
subflow_data_ready(struct sock * sk)1432499ada50SPaolo Abeni static void subflow_data_ready(struct sock *sk)
1433499ada50SPaolo Abeni {
1434499ada50SPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1435499ada50SPaolo Abeni u16 state = 1 << inet_sk_state_load(sk);
1436499ada50SPaolo Abeni struct sock *parent = subflow->conn;
1437499ada50SPaolo Abeni struct mptcp_sock *msk;
1438499ada50SPaolo Abeni
143940e0b090SPeilin Ye trace_sk_data_ready(sk);
144040e0b090SPeilin Ye
1441499ada50SPaolo Abeni msk = mptcp_sk(parent);
1442499ada50SPaolo Abeni if (state & TCPF_LISTEN) {
1443499ada50SPaolo Abeni /* MPJ subflow are removed from accept queue before reaching here,
1444499ada50SPaolo Abeni * avoid stray wakeups
1445499ada50SPaolo Abeni */
1446499ada50SPaolo Abeni if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
1447499ada50SPaolo Abeni return;
1448499ada50SPaolo Abeni
1449499ada50SPaolo Abeni parent->sk_data_ready(parent);
1450499ada50SPaolo Abeni return;
1451499ada50SPaolo Abeni }
1452499ada50SPaolo Abeni
1453499ada50SPaolo Abeni WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
1454499ada50SPaolo Abeni !subflow->mp_join && !(state & TCPF_CLOSE));
1455499ada50SPaolo Abeni
1456124a4885SPaolo Abeni if (mptcp_subflow_data_available(sk)) {
1457499ada50SPaolo Abeni mptcp_data_ready(parent, sk);
1458124a4885SPaolo Abeni
1459124a4885SPaolo Abeni /* subflow-level lowat test are not relevant.
1460124a4885SPaolo Abeni * respect the msk-level threshold eventually mandating an immediate ack
1461124a4885SPaolo Abeni */
1462124a4885SPaolo Abeni if (mptcp_data_avail(msk) < parent->sk_rcvlowat &&
1463124a4885SPaolo Abeni (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss)
1464124a4885SPaolo Abeni inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
1465124a4885SPaolo Abeni } else if (unlikely(sk->sk_err)) {
1466499ada50SPaolo Abeni subflow_error_report(sk);
1467499ada50SPaolo Abeni }
1468124a4885SPaolo Abeni }
1469499ada50SPaolo Abeni
subflow_write_space(struct sock * ssk)1470499ada50SPaolo Abeni static void subflow_write_space(struct sock *ssk)
1471499ada50SPaolo Abeni {
1472499ada50SPaolo Abeni struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
1473499ada50SPaolo Abeni
1474499ada50SPaolo Abeni mptcp_propagate_sndbuf(sk, ssk);
1475499ada50SPaolo Abeni mptcp_write_space(sk);
1476499ada50SPaolo Abeni }
1477499ada50SPaolo Abeni
147851fa7f8eSFlorian Westphal static const struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock * sk)1479cec37a6eSPeter Krystad subflow_default_af_ops(struct sock *sk)
1480cec37a6eSPeter Krystad {
1481cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1482cec37a6eSPeter Krystad if (sk->sk_family == AF_INET6)
1483cec37a6eSPeter Krystad return &subflow_v6_specific;
1484cec37a6eSPeter Krystad #endif
1485cec37a6eSPeter Krystad return &subflow_specific;
1486cec37a6eSPeter Krystad }
1487cec37a6eSPeter Krystad
1488cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
mptcpv6_handle_mapped(struct sock * sk,bool mapped)148931484d56SGeert Uytterhoeven void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
149031484d56SGeert Uytterhoeven {
1491cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1492cec37a6eSPeter Krystad struct inet_connection_sock *icsk = inet_csk(sk);
149351fa7f8eSFlorian Westphal const struct inet_connection_sock_af_ops *target;
1494cec37a6eSPeter Krystad
1495cec37a6eSPeter Krystad target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1496cec37a6eSPeter Krystad
14973d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d\n",
1498cec37a6eSPeter Krystad subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
1499cec37a6eSPeter Krystad
1500cec37a6eSPeter Krystad if (likely(icsk->icsk_af_ops == target))
1501cec37a6eSPeter Krystad return;
1502cec37a6eSPeter Krystad
1503cec37a6eSPeter Krystad subflow->icsk_af_ops = icsk->icsk_af_ops;
1504cec37a6eSPeter Krystad icsk->icsk_af_ops = target;
1505cec37a6eSPeter Krystad }
150631484d56SGeert Uytterhoeven #endif
1507cec37a6eSPeter Krystad
mptcp_info2sockaddr(const struct mptcp_addr_info * info,struct sockaddr_storage * addr,unsigned short family)15081729cf18SGeliang Tang void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
150950a13bc3SMatthieu Baerts struct sockaddr_storage *addr,
151050a13bc3SMatthieu Baerts unsigned short family)
1511ec3edaa7SPeter Krystad {
1512ec3edaa7SPeter Krystad memset(addr, 0, sizeof(*addr));
151350a13bc3SMatthieu Baerts addr->ss_family = family;
1514ec3edaa7SPeter Krystad if (addr->ss_family == AF_INET) {
1515ec3edaa7SPeter Krystad struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1516ec3edaa7SPeter Krystad
151750a13bc3SMatthieu Baerts if (info->family == AF_INET)
1518ec3edaa7SPeter Krystad in_addr->sin_addr = info->addr;
151950a13bc3SMatthieu Baerts #if IS_ENABLED(CONFIG_MPTCP_IPV6)
152050a13bc3SMatthieu Baerts else if (ipv6_addr_v4mapped(&info->addr6))
152150a13bc3SMatthieu Baerts in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
152250a13bc3SMatthieu Baerts #endif
1523ec3edaa7SPeter Krystad in_addr->sin_port = info->port;
1524ec3edaa7SPeter Krystad }
1525ec3edaa7SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1526ec3edaa7SPeter Krystad else if (addr->ss_family == AF_INET6) {
1527ec3edaa7SPeter Krystad struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1528ec3edaa7SPeter Krystad
152950a13bc3SMatthieu Baerts if (info->family == AF_INET)
153050a13bc3SMatthieu Baerts ipv6_addr_set_v4mapped(info->addr.s_addr,
153150a13bc3SMatthieu Baerts &in6_addr->sin6_addr);
153250a13bc3SMatthieu Baerts else
1533ec3edaa7SPeter Krystad in6_addr->sin6_addr = info->addr6;
1534ec3edaa7SPeter Krystad in6_addr->sin6_port = info->port;
1535ec3edaa7SPeter Krystad }
1536ec3edaa7SPeter Krystad #endif
1537ec3edaa7SPeter Krystad }
1538ec3edaa7SPeter Krystad
__mptcp_subflow_connect(struct sock * sk,const struct mptcp_addr_info * loc,const struct mptcp_addr_info * remote)1539ef0da3b8SPaolo Abeni int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
1540ee285257SGeliang Tang const struct mptcp_addr_info *remote)
1541ec3edaa7SPeter Krystad {
1542ec3edaa7SPeter Krystad struct mptcp_sock *msk = mptcp_sk(sk);
1543ec3edaa7SPeter Krystad struct mptcp_subflow_context *subflow;
1544ec3edaa7SPeter Krystad struct sockaddr_storage addr;
15452ff0e566SGeliang Tang int remote_id = remote->id;
15466bad912bSPaolo Abeni int local_id = loc->id;
154795d68651SPaolo Abeni int err = -ENOTCONN;
1548ec3edaa7SPeter Krystad struct socket *sf;
15496bad912bSPaolo Abeni struct sock *ssk;
1550ec3edaa7SPeter Krystad u32 remote_token;
1551ec3edaa7SPeter Krystad int addrlen;
1552ee285257SGeliang Tang int ifindex;
1553ee285257SGeliang Tang u8 flags;
1554ec3edaa7SPeter Krystad
1555b93df08cSPaolo Abeni if (!mptcp_is_fully_established(sk))
155695d68651SPaolo Abeni goto err_out;
1557ec3edaa7SPeter Krystad
15586bc1fe7dSPaolo Abeni err = mptcp_subflow_create_socket(sk, loc->family, &sf);
1559ec3edaa7SPeter Krystad if (err)
156095d68651SPaolo Abeni goto err_out;
1561ec3edaa7SPeter Krystad
15626bad912bSPaolo Abeni ssk = sf->sk;
15636bad912bSPaolo Abeni subflow = mptcp_subflow_ctx(ssk);
15646bad912bSPaolo Abeni do {
15656bad912bSPaolo Abeni get_random_bytes(&subflow->local_nonce, sizeof(u32));
15666bad912bSPaolo Abeni } while (!subflow->local_nonce);
15676bad912bSPaolo Abeni
15684cf86ae8SPaolo Abeni if (local_id)
15694cf86ae8SPaolo Abeni subflow_set_local_id(subflow, local_id);
15706bad912bSPaolo Abeni
15718b201370SKishen Maloor mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
1572ee285257SGeliang Tang &flags, &ifindex);
1573b3ea6b27SPaolo Abeni subflow->remote_key_valid = 1;
1574ec3edaa7SPeter Krystad subflow->remote_key = msk->remote_key;
1575ec3edaa7SPeter Krystad subflow->local_key = msk->local_key;
1576ec3edaa7SPeter Krystad subflow->token = msk->token;
157750a13bc3SMatthieu Baerts mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
1578ec3edaa7SPeter Krystad
1579ec3edaa7SPeter Krystad addrlen = sizeof(struct sockaddr_in);
1580ec3edaa7SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
158150a13bc3SMatthieu Baerts if (addr.ss_family == AF_INET6)
1582ec3edaa7SPeter Krystad addrlen = sizeof(struct sockaddr_in6);
1583ec3edaa7SPeter Krystad #endif
1584c9406a23SFlorian Westphal mptcp_sockopt_sync(msk, ssk);
1585c9406a23SFlorian Westphal
1586daa83ab0SGeliang Tang ssk->sk_bound_dev_if = ifindex;
1587ec3edaa7SPeter Krystad err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1588ec3edaa7SPeter Krystad if (err)
1589ec3edaa7SPeter Krystad goto failed;
1590ec3edaa7SPeter Krystad
1591ec3edaa7SPeter Krystad mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
15923d2e1b82SMatthieu Baerts (NGI0) pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk,
15932ff0e566SGeliang Tang remote_token, local_id, remote_id);
1594ec3edaa7SPeter Krystad subflow->remote_token = remote_token;
15952dba5774SPaolo Abeni WRITE_ONCE(subflow->remote_id, remote_id);
1596ec3edaa7SPeter Krystad subflow->request_join = 1;
1597daa83ab0SGeliang Tang subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
15986f06b4d4SPaolo Abeni subflow->subflow_id = msk->subflow_id++;
159950a13bc3SMatthieu Baerts mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
1600ec3edaa7SPeter Krystad
16013e501490SPaolo Abeni sock_hold(ssk);
16023e501490SPaolo Abeni list_add_tail(&subflow->node, &msk->conn_list);
1603ec3edaa7SPeter Krystad err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1604ec3edaa7SPeter Krystad if (err && err != -EINPROGRESS)
16055b950ff4SPaolo Abeni goto failed_unlink;
1606ec3edaa7SPeter Krystad
1607866f26f2SPaolo Abeni /* discard the subflow socket */
1608866f26f2SPaolo Abeni mptcp_sock_graft(ssk, sk->sk_socket);
1609866f26f2SPaolo Abeni iput(SOCK_INODE(sf));
16100530020aSGeliang Tang WRITE_ONCE(msk->allow_infinite_fallback, false);
161127e5ccc2SPaolo Abeni mptcp_stop_tout_timer(sk);
1612b5177ed9SMat Martineau return 0;
1613ec3edaa7SPeter Krystad
16145b950ff4SPaolo Abeni failed_unlink:
16155b950ff4SPaolo Abeni list_del(&subflow->node);
1616f0715779SFlorian Westphal sock_put(mptcp_subflow_tcp_sock(subflow));
1617ec3edaa7SPeter Krystad
1618ec3edaa7SPeter Krystad failed:
1619e16163b6SPaolo Abeni subflow->disposable = 1;
1620ec3edaa7SPeter Krystad sock_release(sf);
162195d68651SPaolo Abeni
162295d68651SPaolo Abeni err_out:
162395d68651SPaolo Abeni /* we account subflows before the creation, and this failures will not
162495d68651SPaolo Abeni * be caught by sk_state_change()
162595d68651SPaolo Abeni */
162695d68651SPaolo Abeni mptcp_pm_close_subflow(msk);
1627ec3edaa7SPeter Krystad return err;
1628ec3edaa7SPeter Krystad }
1629ec3edaa7SPeter Krystad
mptcp_attach_cgroup(struct sock * parent,struct sock * child)16303764b0c5SNicolas Rybowski static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
16313764b0c5SNicolas Rybowski {
16323764b0c5SNicolas Rybowski #ifdef CONFIG_SOCK_CGROUP_DATA
16333764b0c5SNicolas Rybowski struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
16343764b0c5SNicolas Rybowski *child_skcd = &child->sk_cgrp_data;
16353764b0c5SNicolas Rybowski
16363764b0c5SNicolas Rybowski /* only the additional subflows created by kworkers have to be modified */
16373764b0c5SNicolas Rybowski if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
16383764b0c5SNicolas Rybowski cgroup_id(sock_cgroup_ptr(child_skcd))) {
16393764b0c5SNicolas Rybowski #ifdef CONFIG_MEMCG
16403764b0c5SNicolas Rybowski struct mem_cgroup *memcg = parent->sk_memcg;
16413764b0c5SNicolas Rybowski
16423764b0c5SNicolas Rybowski mem_cgroup_sk_free(child);
16433764b0c5SNicolas Rybowski if (memcg && css_tryget(&memcg->css))
16443764b0c5SNicolas Rybowski child->sk_memcg = memcg;
16453764b0c5SNicolas Rybowski #endif /* CONFIG_MEMCG */
16463764b0c5SNicolas Rybowski
16473764b0c5SNicolas Rybowski cgroup_sk_free(child_skcd);
16483764b0c5SNicolas Rybowski *child_skcd = *parent_skcd;
16493764b0c5SNicolas Rybowski cgroup_sk_clone(child_skcd);
16503764b0c5SNicolas Rybowski }
16513764b0c5SNicolas Rybowski #endif /* CONFIG_SOCK_CGROUP_DATA */
16523764b0c5SNicolas Rybowski }
16533764b0c5SNicolas Rybowski
mptcp_subflow_ops_override(struct sock * ssk)1654b19bc294SPaolo Abeni static void mptcp_subflow_ops_override(struct sock *ssk)
1655b19bc294SPaolo Abeni {
1656b19bc294SPaolo Abeni #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1657b19bc294SPaolo Abeni if (ssk->sk_prot == &tcpv6_prot)
1658b19bc294SPaolo Abeni ssk->sk_prot = &tcpv6_prot_override;
1659b19bc294SPaolo Abeni else
1660b19bc294SPaolo Abeni #endif
1661b19bc294SPaolo Abeni ssk->sk_prot = &tcp_prot_override;
1662b19bc294SPaolo Abeni }
1663b19bc294SPaolo Abeni
mptcp_subflow_ops_undo_override(struct sock * ssk)1664b19bc294SPaolo Abeni static void mptcp_subflow_ops_undo_override(struct sock *ssk)
1665b19bc294SPaolo Abeni {
1666b19bc294SPaolo Abeni #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1667b19bc294SPaolo Abeni if (ssk->sk_prot == &tcpv6_prot_override)
1668b19bc294SPaolo Abeni ssk->sk_prot = &tcpv6_prot;
1669b19bc294SPaolo Abeni else
1670b19bc294SPaolo Abeni #endif
1671b19bc294SPaolo Abeni ssk->sk_prot = &tcp_prot;
1672b19bc294SPaolo Abeni }
16736bc1fe7dSPaolo Abeni
mptcp_subflow_create_socket(struct sock * sk,unsigned short family,struct socket ** new_sock)16746bc1fe7dSPaolo Abeni int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
16756bc1fe7dSPaolo Abeni struct socket **new_sock)
16762303f994SPeter Krystad {
16772303f994SPeter Krystad struct mptcp_subflow_context *subflow;
16782303f994SPeter Krystad struct net *net = sock_net(sk);
16792303f994SPeter Krystad struct socket *sf;
16802303f994SPeter Krystad int err;
16812303f994SPeter Krystad
1682adf73410SPaolo Abeni /* un-accepted server sockets can reach here - on bad configuration
1683adf73410SPaolo Abeni * bail early to avoid greater trouble later
1684adf73410SPaolo Abeni */
1685adf73410SPaolo Abeni if (unlikely(!sk->sk_socket))
1686adf73410SPaolo Abeni return -EINVAL;
1687adf73410SPaolo Abeni
16886bc1fe7dSPaolo Abeni err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
16892303f994SPeter Krystad if (err)
16902303f994SPeter Krystad return err;
16912303f994SPeter Krystad
1692ad217100SPaolo Abeni lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
16932303f994SPeter Krystad
1694e3d9387fSPaolo Abeni err = security_mptcp_add_subflow(sk, sf->sk);
1695e3d9387fSPaolo Abeni if (err)
1696e3d9387fSPaolo Abeni goto release_ssk;
1697e3d9387fSPaolo Abeni
16983764b0c5SNicolas Rybowski /* the newly created socket has to be in the same cgroup as its parent */
16993764b0c5SNicolas Rybowski mptcp_attach_cgroup(sk, sf->sk);
17003764b0c5SNicolas Rybowski
17012303f994SPeter Krystad /* kernel sockets do not by default acquire net ref, but TCP timer
17022303f994SPeter Krystad * needs it.
1703d1e96cc4SEric Dumazet * Update ns_tracker to current stack trace and refcounted tracker.
17042303f994SPeter Krystad */
1705d1e96cc4SEric Dumazet __netns_tracker_free(net, &sf->sk->ns_tracker, false);
17062303f994SPeter Krystad sf->sk->sk_net_refcnt = 1;
17071d2f3d3cSEric Dumazet get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
1708d477eb90SEric Dumazet sock_inuse_add(net, 1);
17092303f994SPeter Krystad err = tcp_set_ulp(sf->sk, "mptcp");
1710e3d9387fSPaolo Abeni
1711e3d9387fSPaolo Abeni release_ssk:
17122303f994SPeter Krystad release_sock(sf->sk);
17132303f994SPeter Krystad
1714b8ad540dSWei Yongjun if (err) {
1715b8ad540dSWei Yongjun sock_release(sf);
17162303f994SPeter Krystad return err;
1717b8ad540dSWei Yongjun }
17182303f994SPeter Krystad
17197d14b0d2SPaolo Abeni /* the newly created socket really belongs to the owning MPTCP master
17207d14b0d2SPaolo Abeni * socket, even if for additional subflows the allocation is performed
17217d14b0d2SPaolo Abeni * by a kernel workqueue. Adjust inode references, so that the
1722d640516aSMenglong Dong * procfs/diag interfaces really show this one belonging to the correct
17237d14b0d2SPaolo Abeni * user.
17247d14b0d2SPaolo Abeni */
17257d14b0d2SPaolo Abeni SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
17267d14b0d2SPaolo Abeni SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
17277d14b0d2SPaolo Abeni SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
17287d14b0d2SPaolo Abeni
17292303f994SPeter Krystad subflow = mptcp_subflow_ctx(sf->sk);
17303d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p\n", subflow);
17312303f994SPeter Krystad
17322303f994SPeter Krystad *new_sock = sf;
173379c0949eSPeter Krystad sock_hold(sk);
17342303f994SPeter Krystad subflow->conn = sk;
1735b19bc294SPaolo Abeni mptcp_subflow_ops_override(sf->sk);
17362303f994SPeter Krystad
17372303f994SPeter Krystad return 0;
17382303f994SPeter Krystad }
17392303f994SPeter Krystad
subflow_create_ctx(struct sock * sk,gfp_t priority)17402303f994SPeter Krystad static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
17412303f994SPeter Krystad gfp_t priority)
17422303f994SPeter Krystad {
17432303f994SPeter Krystad struct inet_connection_sock *icsk = inet_csk(sk);
17442303f994SPeter Krystad struct mptcp_subflow_context *ctx;
17452303f994SPeter Krystad
17462303f994SPeter Krystad ctx = kzalloc(sizeof(*ctx), priority);
17472303f994SPeter Krystad if (!ctx)
17482303f994SPeter Krystad return NULL;
17492303f994SPeter Krystad
17502303f994SPeter Krystad rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1751cec37a6eSPeter Krystad INIT_LIST_HEAD(&ctx->node);
1752b19bc294SPaolo Abeni INIT_LIST_HEAD(&ctx->delegated_node);
17532303f994SPeter Krystad
17543d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p\n", ctx);
17552303f994SPeter Krystad
17562303f994SPeter Krystad ctx->tcp_sock = sk;
1757ba2cf922SPaolo Abeni WRITE_ONCE(ctx->local_id, -1);
17582303f994SPeter Krystad
17592303f994SPeter Krystad return ctx;
17602303f994SPeter Krystad }
17612303f994SPeter Krystad
__subflow_state_change(struct sock * sk)1762648ef4b8SMat Martineau static void __subflow_state_change(struct sock *sk)
1763648ef4b8SMat Martineau {
1764648ef4b8SMat Martineau struct socket_wq *wq;
1765648ef4b8SMat Martineau
1766648ef4b8SMat Martineau rcu_read_lock();
1767648ef4b8SMat Martineau wq = rcu_dereference(sk->sk_wq);
1768648ef4b8SMat Martineau if (skwq_has_sleeper(wq))
1769648ef4b8SMat Martineau wake_up_interruptible_all(&wq->wait);
1770648ef4b8SMat Martineau rcu_read_unlock();
1771648ef4b8SMat Martineau }
1772648ef4b8SMat Martineau
subflow_is_done(const struct sock * sk)1773648ef4b8SMat Martineau static bool subflow_is_done(const struct sock *sk)
1774648ef4b8SMat Martineau {
1775648ef4b8SMat Martineau return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1776648ef4b8SMat Martineau }
1777648ef4b8SMat Martineau
subflow_state_change(struct sock * sk)1778648ef4b8SMat Martineau static void subflow_state_change(struct sock *sk)
1779648ef4b8SMat Martineau {
1780648ef4b8SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1781dc093db5SPaolo Abeni struct sock *parent = subflow->conn;
178281c1d029SPaolo Abeni struct mptcp_sock *msk;
1783648ef4b8SMat Martineau
1784648ef4b8SMat Martineau __subflow_state_change(sk);
1785648ef4b8SMat Martineau
178681c1d029SPaolo Abeni msk = mptcp_sk(parent);
17878fd73804SDavide Caratti if (subflow_simultaneous_connect(sk)) {
17888fd73804SDavide Caratti mptcp_do_fallback(sk);
178981c1d029SPaolo Abeni pr_fallback(msk);
17908fd73804SDavide Caratti subflow->conn_finished = 1;
1791b45df837SPaolo Abeni mptcp_propagate_state(parent, sk, subflow, NULL);
17928fd73804SDavide Caratti }
17938fd73804SDavide Caratti
1794648ef4b8SMat Martineau /* as recvmsg() does not acquire the subflow socket for ssk selection
1795648ef4b8SMat Martineau * a fin packet carrying a DSS can be unnoticed if we don't trigger
1796648ef4b8SMat Martineau * the data available machinery here.
1797648ef4b8SMat Martineau */
1798e1ff9e82SDavide Caratti if (mptcp_subflow_data_available(sk))
17992e52213cSFlorian Westphal mptcp_data_ready(parent, sk);
1800499ada50SPaolo Abeni else if (unlikely(sk->sk_err))
1801499ada50SPaolo Abeni subflow_error_report(sk);
1802648ef4b8SMat Martineau
180340947e13SFlorian Westphal subflow_sched_work_if_closed(mptcp_sk(parent), sk);
180440947e13SFlorian Westphal
180581c1d029SPaolo Abeni /* when the fallback subflow closes the rx side, trigger a 'dummy'
180681c1d029SPaolo Abeni * ingress data fin, so that the msk state will follow along
180781c1d029SPaolo Abeni */
180881c1d029SPaolo Abeni if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
180981c1d029SPaolo Abeni mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
181081c1d029SPaolo Abeni mptcp_schedule_work(parent);
1811648ef4b8SMat Martineau }
1812648ef4b8SMat Martineau
mptcp_subflow_queue_clean(struct sock * listener_sk,struct sock * listener_ssk)18132a6a870eSPaolo Abeni void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
18142a6a870eSPaolo Abeni {
18152a6a870eSPaolo Abeni struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
1816511b90e3SPaolo Abeni struct request_sock *req, *head, *tail;
18172a6a870eSPaolo Abeni struct mptcp_subflow_context *subflow;
1818511b90e3SPaolo Abeni struct sock *sk, *ssk;
18192a6a870eSPaolo Abeni
1820511b90e3SPaolo Abeni /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
1821511b90e3SPaolo Abeni * Splice the req list, so that accept() can not reach the pending ssk after
1822511b90e3SPaolo Abeni * the listener socket is released below.
1823511b90e3SPaolo Abeni */
1824511b90e3SPaolo Abeni spin_lock_bh(&queue->rskq_lock);
1825511b90e3SPaolo Abeni head = queue->rskq_accept_head;
1826511b90e3SPaolo Abeni tail = queue->rskq_accept_tail;
1827511b90e3SPaolo Abeni queue->rskq_accept_head = NULL;
1828511b90e3SPaolo Abeni queue->rskq_accept_tail = NULL;
18292a6a870eSPaolo Abeni spin_unlock_bh(&queue->rskq_lock);
1830511b90e3SPaolo Abeni
18312a6a870eSPaolo Abeni if (!head)
18322a6a870eSPaolo Abeni return;
18332a6a870eSPaolo Abeni
18342a6a870eSPaolo Abeni /* can't acquire the msk socket lock under the subflow one,
18352a6a870eSPaolo Abeni * or will cause ABBA deadlock
18362a6a870eSPaolo Abeni */
18372a6a870eSPaolo Abeni release_sock(listener_ssk);
18382a6a870eSPaolo Abeni
1839511b90e3SPaolo Abeni for (req = head; req; req = req->dl_next) {
1840511b90e3SPaolo Abeni ssk = req->sk;
1841511b90e3SPaolo Abeni if (!sk_is_mptcp(ssk))
1842511b90e3SPaolo Abeni continue;
1843511b90e3SPaolo Abeni
1844511b90e3SPaolo Abeni subflow = mptcp_subflow_ctx(ssk);
1845511b90e3SPaolo Abeni if (!subflow || !subflow->conn)
1846511b90e3SPaolo Abeni continue;
1847511b90e3SPaolo Abeni
1848511b90e3SPaolo Abeni sk = subflow->conn;
1849511b90e3SPaolo Abeni sock_hold(sk);
18502a6a870eSPaolo Abeni
18512a6a870eSPaolo Abeni lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
185263740448SPaolo Abeni __mptcp_unaccepted_force_close(sk);
18532a6a870eSPaolo Abeni release_sock(sk);
18542a6a870eSPaolo Abeni
18552a6a870eSPaolo Abeni /* lockdep will report a false positive ABBA deadlock
18562a6a870eSPaolo Abeni * between cancel_work_sync and the listener socket.
18572a6a870eSPaolo Abeni * The involved locks belong to different sockets WRT
18582a6a870eSPaolo Abeni * the existing AB chain.
18592a6a870eSPaolo Abeni * Using a per socket key is problematic as key
18602a6a870eSPaolo Abeni * deregistration requires process context and must be
18612a6a870eSPaolo Abeni * performed at socket disposal time, in atomic
18622a6a870eSPaolo Abeni * context.
18632a6a870eSPaolo Abeni * Just tell lockdep to consider the listener socket
18642a6a870eSPaolo Abeni * released here.
18652a6a870eSPaolo Abeni */
18662a6a870eSPaolo Abeni mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
18672a6a870eSPaolo Abeni mptcp_cancel_work(sk);
18682a6a870eSPaolo Abeni mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
18692a6a870eSPaolo Abeni
18702a6a870eSPaolo Abeni sock_put(sk);
18712a6a870eSPaolo Abeni }
18722a6a870eSPaolo Abeni
18732a6a870eSPaolo Abeni /* we are still under the listener msk socket lock */
18742a6a870eSPaolo Abeni lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
1875511b90e3SPaolo Abeni
1876511b90e3SPaolo Abeni /* restore the listener queue, to let the TCP code clean it up */
1877511b90e3SPaolo Abeni spin_lock_bh(&queue->rskq_lock);
1878511b90e3SPaolo Abeni WARN_ON_ONCE(queue->rskq_accept_head);
1879511b90e3SPaolo Abeni queue->rskq_accept_head = head;
1880511b90e3SPaolo Abeni queue->rskq_accept_tail = tail;
1881511b90e3SPaolo Abeni spin_unlock_bh(&queue->rskq_lock);
18822a6a870eSPaolo Abeni }
18832a6a870eSPaolo Abeni
subflow_ulp_init(struct sock * sk)18842303f994SPeter Krystad static int subflow_ulp_init(struct sock *sk)
18852303f994SPeter Krystad {
1886cec37a6eSPeter Krystad struct inet_connection_sock *icsk = inet_csk(sk);
18872303f994SPeter Krystad struct mptcp_subflow_context *ctx;
18882303f994SPeter Krystad struct tcp_sock *tp = tcp_sk(sk);
18892303f994SPeter Krystad int err = 0;
18902303f994SPeter Krystad
18912303f994SPeter Krystad /* disallow attaching ULP to a socket unless it has been
18922303f994SPeter Krystad * created with sock_create_kern()
18932303f994SPeter Krystad */
18942303f994SPeter Krystad if (!sk->sk_kern_sock) {
18952303f994SPeter Krystad err = -EOPNOTSUPP;
18962303f994SPeter Krystad goto out;
18972303f994SPeter Krystad }
18982303f994SPeter Krystad
18992303f994SPeter Krystad ctx = subflow_create_ctx(sk, GFP_KERNEL);
19002303f994SPeter Krystad if (!ctx) {
19012303f994SPeter Krystad err = -ENOMEM;
19022303f994SPeter Krystad goto out;
19032303f994SPeter Krystad }
19042303f994SPeter Krystad
19053d2e1b82SMatthieu Baerts (NGI0) pr_debug("subflow=%p, family=%d\n", ctx, sk->sk_family);
19062303f994SPeter Krystad
19072303f994SPeter Krystad tp->is_mptcp = 1;
1908cec37a6eSPeter Krystad ctx->icsk_af_ops = icsk->icsk_af_ops;
1909cec37a6eSPeter Krystad icsk->icsk_af_ops = subflow_default_af_ops(sk);
1910648ef4b8SMat Martineau ctx->tcp_state_change = sk->sk_state_change;
191115cc1045SPaolo Abeni ctx->tcp_error_report = sk->sk_error_report;
1912952382c6SFlorian Westphal
1913952382c6SFlorian Westphal WARN_ON_ONCE(sk->sk_data_ready != sock_def_readable);
1914952382c6SFlorian Westphal WARN_ON_ONCE(sk->sk_write_space != sk_stream_write_space);
1915952382c6SFlorian Westphal
1916648ef4b8SMat Martineau sk->sk_data_ready = subflow_data_ready;
1917648ef4b8SMat Martineau sk->sk_write_space = subflow_write_space;
1918648ef4b8SMat Martineau sk->sk_state_change = subflow_state_change;
191915cc1045SPaolo Abeni sk->sk_error_report = subflow_error_report;
19202303f994SPeter Krystad out:
19212303f994SPeter Krystad return err;
19222303f994SPeter Krystad }
19232303f994SPeter Krystad
subflow_ulp_release(struct sock * ssk)1924e16163b6SPaolo Abeni static void subflow_ulp_release(struct sock *ssk)
19252303f994SPeter Krystad {
1926e16163b6SPaolo Abeni struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
1927e16163b6SPaolo Abeni bool release = true;
1928e16163b6SPaolo Abeni struct sock *sk;
19292303f994SPeter Krystad
19302303f994SPeter Krystad if (!ctx)
19312303f994SPeter Krystad return;
19322303f994SPeter Krystad
1933e16163b6SPaolo Abeni sk = ctx->conn;
1934e16163b6SPaolo Abeni if (sk) {
1935e16163b6SPaolo Abeni /* if the msk has been orphaned, keep the ctx
19360597d0f8SPaolo Abeni * alive, will be freed by __mptcp_close_ssk(),
19370597d0f8SPaolo Abeni * when the subflow is still unaccepted
1938e16163b6SPaolo Abeni */
19390597d0f8SPaolo Abeni release = ctx->disposable || list_empty(&ctx->node);
1940b6985b9bSPaolo Abeni
1941b6985b9bSPaolo Abeni /* inet_child_forget() does not call sk_state_change(),
1942b6985b9bSPaolo Abeni * explicitly trigger the socket close machinery
1943b6985b9bSPaolo Abeni */
1944b6985b9bSPaolo Abeni if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
1945b6985b9bSPaolo Abeni &mptcp_sk(sk)->flags))
1946b6985b9bSPaolo Abeni mptcp_schedule_work(sk);
1947e16163b6SPaolo Abeni sock_put(sk);
1948e16163b6SPaolo Abeni }
194979c0949eSPeter Krystad
1950b19bc294SPaolo Abeni mptcp_subflow_ops_undo_override(ssk);
1951e16163b6SPaolo Abeni if (release)
19522303f994SPeter Krystad kfree_rcu(ctx, rcu);
19532303f994SPeter Krystad }
19542303f994SPeter Krystad
subflow_ulp_clone(const struct request_sock * req,struct sock * newsk,const gfp_t priority)1955cec37a6eSPeter Krystad static void subflow_ulp_clone(const struct request_sock *req,
1956cec37a6eSPeter Krystad struct sock *newsk,
1957cec37a6eSPeter Krystad const gfp_t priority)
1958cec37a6eSPeter Krystad {
1959cec37a6eSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1960cec37a6eSPeter Krystad struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1961cec37a6eSPeter Krystad struct mptcp_subflow_context *new_ctx;
1962cec37a6eSPeter Krystad
1963f296234cSPeter Krystad if (!tcp_rsk(req)->is_mptcp ||
1964f296234cSPeter Krystad (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1965648ef4b8SMat Martineau subflow_ulp_fallback(newsk, old_ctx);
1966cec37a6eSPeter Krystad return;
1967cec37a6eSPeter Krystad }
1968cec37a6eSPeter Krystad
1969cec37a6eSPeter Krystad new_ctx = subflow_create_ctx(newsk, priority);
1970edc7e489SMat Martineau if (!new_ctx) {
1971648ef4b8SMat Martineau subflow_ulp_fallback(newsk, old_ctx);
1972cec37a6eSPeter Krystad return;
1973cec37a6eSPeter Krystad }
1974cec37a6eSPeter Krystad
1975cec37a6eSPeter Krystad new_ctx->conn_finished = 1;
1976cec37a6eSPeter Krystad new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1977648ef4b8SMat Martineau new_ctx->tcp_state_change = old_ctx->tcp_state_change;
197815cc1045SPaolo Abeni new_ctx->tcp_error_report = old_ctx->tcp_error_report;
197958b09919SPaolo Abeni new_ctx->rel_write_seq = 1;
198058b09919SPaolo Abeni new_ctx->tcp_sock = newsk;
198158b09919SPaolo Abeni
1982f296234cSPeter Krystad if (subflow_req->mp_capable) {
1983f296234cSPeter Krystad /* see comments in subflow_syn_recv_sock(), MPTCP connection
1984f296234cSPeter Krystad * is fully established only after we receive the remote key
1985f296234cSPeter Krystad */
1986cec37a6eSPeter Krystad new_ctx->mp_capable = 1;
1987cec37a6eSPeter Krystad new_ctx->local_key = subflow_req->local_key;
198879c0949eSPeter Krystad new_ctx->token = subflow_req->token;
1989648ef4b8SMat Martineau new_ctx->ssn_offset = subflow_req->ssn_offset;
1990648ef4b8SMat Martineau new_ctx->idsn = subflow_req->idsn;
19914cf86ae8SPaolo Abeni
19924cf86ae8SPaolo Abeni /* this is the first subflow, id is always 0 */
1993ba2cf922SPaolo Abeni subflow_set_local_id(new_ctx, 0);
1994f296234cSPeter Krystad } else if (subflow_req->mp_join) {
1995ec3edaa7SPeter Krystad new_ctx->ssn_offset = subflow_req->ssn_offset;
1996f296234cSPeter Krystad new_ctx->mp_join = 1;
1997f296234cSPeter Krystad new_ctx->fully_established = 1;
1998b3ea6b27SPaolo Abeni new_ctx->remote_key_valid = 1;
1999f296234cSPeter Krystad new_ctx->backup = subflow_req->backup;
20008ed3e34cSMatthieu Baerts (NGI0) new_ctx->request_bkup = subflow_req->request_bkup;
20012dba5774SPaolo Abeni WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
2002f296234cSPeter Krystad new_ctx->token = subflow_req->token;
2003f296234cSPeter Krystad new_ctx->thmac = subflow_req->thmac;
20044cf86ae8SPaolo Abeni
20054cf86ae8SPaolo Abeni /* the subflow req id is valid, fetched via subflow_check_req()
20064cf86ae8SPaolo Abeni * and subflow_token_join_request()
20074cf86ae8SPaolo Abeni */
20084cf86ae8SPaolo Abeni subflow_set_local_id(new_ctx, subflow_req->local_id);
2009f296234cSPeter Krystad }
2010cec37a6eSPeter Krystad }
2011cec37a6eSPeter Krystad
tcp_release_cb_override(struct sock * ssk)2012b19bc294SPaolo Abeni static void tcp_release_cb_override(struct sock *ssk)
2013b19bc294SPaolo Abeni {
2014b19bc294SPaolo Abeni struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
2015a5efdbceSPaolo Abeni long status;
2016b19bc294SPaolo Abeni
2017a5efdbceSPaolo Abeni /* process and clear all the pending actions, but leave the subflow into
2018a5efdbceSPaolo Abeni * the napi queue. To respect locking, only the same CPU that originated
2019a5efdbceSPaolo Abeni * the action can touch the list. mptcp_napi_poll will take care of it.
2020a5efdbceSPaolo Abeni */
2021a5efdbceSPaolo Abeni status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0);
2022a5efdbceSPaolo Abeni if (status)
2023a5efdbceSPaolo Abeni mptcp_subflow_process_delegated(ssk, status);
2024b19bc294SPaolo Abeni
2025b19bc294SPaolo Abeni tcp_release_cb(ssk);
2026b19bc294SPaolo Abeni }
2027b19bc294SPaolo Abeni
tcp_abort_override(struct sock * ssk,int err)2028c25546caSPaolo Abeni static int tcp_abort_override(struct sock *ssk, int err)
2029c25546caSPaolo Abeni {
2030c25546caSPaolo Abeni /* closing a listener subflow requires a great deal of care.
2031c25546caSPaolo Abeni * keep it simple and just prevent such operation
2032c25546caSPaolo Abeni */
2033c25546caSPaolo Abeni if (inet_sk_state_load(ssk) == TCP_LISTEN)
2034c25546caSPaolo Abeni return -EINVAL;
2035c25546caSPaolo Abeni
2036c25546caSPaolo Abeni return tcp_abort(ssk, err);
2037c25546caSPaolo Abeni }
2038c25546caSPaolo Abeni
20392303f994SPeter Krystad static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
20402303f994SPeter Krystad .name = "mptcp",
20412303f994SPeter Krystad .owner = THIS_MODULE,
20422303f994SPeter Krystad .init = subflow_ulp_init,
20432303f994SPeter Krystad .release = subflow_ulp_release,
2044cec37a6eSPeter Krystad .clone = subflow_ulp_clone,
20452303f994SPeter Krystad };
20462303f994SPeter Krystad
subflow_ops_init(struct request_sock_ops * subflow_ops)2047cec37a6eSPeter Krystad static int subflow_ops_init(struct request_sock_ops *subflow_ops)
2048cec37a6eSPeter Krystad {
2049cec37a6eSPeter Krystad subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
2050cec37a6eSPeter Krystad
2051cec37a6eSPeter Krystad subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
2052cec37a6eSPeter Krystad subflow_ops->obj_size, 0,
2053cec37a6eSPeter Krystad SLAB_ACCOUNT |
2054cec37a6eSPeter Krystad SLAB_TYPESAFE_BY_RCU,
2055cec37a6eSPeter Krystad NULL);
2056cec37a6eSPeter Krystad if (!subflow_ops->slab)
2057cec37a6eSPeter Krystad return -ENOMEM;
2058cec37a6eSPeter Krystad
2059cec37a6eSPeter Krystad return 0;
2060cec37a6eSPeter Krystad }
2061cec37a6eSPeter Krystad
mptcp_subflow_init(void)2062d39dcecaSPaolo Abeni void __init mptcp_subflow_init(void)
20632303f994SPeter Krystad {
206434b21d1dSMatthieu Baerts mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops;
206534b21d1dSMatthieu Baerts mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4";
2066d3295feeSMatthieu Baerts mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor;
2067d3295feeSMatthieu Baerts
206834b21d1dSMatthieu Baerts if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0)
206934b21d1dSMatthieu Baerts panic("MPTCP: failed to init subflow v4 request sock ops\n");
2070cec37a6eSPeter Krystad
2071cec37a6eSPeter Krystad subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
20727ea851d1SFlorian Westphal subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
207336b122baSDmytro Shytyi subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack;
2074cec37a6eSPeter Krystad
2075cec37a6eSPeter Krystad subflow_specific = ipv4_specific;
2076cec37a6eSPeter Krystad subflow_specific.conn_request = subflow_v4_conn_request;
2077cec37a6eSPeter Krystad subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
2078cec37a6eSPeter Krystad subflow_specific.sk_rx_dst_set = subflow_finish_connect;
20794cf86ae8SPaolo Abeni subflow_specific.rebuild_header = subflow_rebuild_header;
2080cec37a6eSPeter Krystad
2081b19bc294SPaolo Abeni tcp_prot_override = tcp_prot;
2082b19bc294SPaolo Abeni tcp_prot_override.release_cb = tcp_release_cb_override;
2083c25546caSPaolo Abeni tcp_prot_override.diag_destroy = tcp_abort_override;
2084b19bc294SPaolo Abeni
2085cec37a6eSPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
208634b21d1dSMatthieu Baerts /* In struct mptcp_subflow_request_sock, we assume the TCP request sock
208734b21d1dSMatthieu Baerts * structures for v4 and v6 have the same size. It should not changed in
208834b21d1dSMatthieu Baerts * the future but better to make sure to be warned if it is no longer
208934b21d1dSMatthieu Baerts * the case.
209034b21d1dSMatthieu Baerts */
209134b21d1dSMatthieu Baerts BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock));
209234b21d1dSMatthieu Baerts
209334b21d1dSMatthieu Baerts mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops;
209434b21d1dSMatthieu Baerts mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6";
2095d3295feeSMatthieu Baerts mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor;
2096d3295feeSMatthieu Baerts
209734b21d1dSMatthieu Baerts if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0)
209834b21d1dSMatthieu Baerts panic("MPTCP: failed to init subflow v6 request sock ops\n");
209934b21d1dSMatthieu Baerts
2100cec37a6eSPeter Krystad subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
21017ea851d1SFlorian Westphal subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
210236b122baSDmytro Shytyi subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack;
2103cec37a6eSPeter Krystad
2104cec37a6eSPeter Krystad subflow_v6_specific = ipv6_specific;
2105cec37a6eSPeter Krystad subflow_v6_specific.conn_request = subflow_v6_conn_request;
2106cec37a6eSPeter Krystad subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
2107cec37a6eSPeter Krystad subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
21084cf86ae8SPaolo Abeni subflow_v6_specific.rebuild_header = subflow_v6_rebuild_header;
2109cec37a6eSPeter Krystad
2110cec37a6eSPeter Krystad subflow_v6m_specific = subflow_v6_specific;
2111cec37a6eSPeter Krystad subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
2112cec37a6eSPeter Krystad subflow_v6m_specific.send_check = ipv4_specific.send_check;
2113cec37a6eSPeter Krystad subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
2114cec37a6eSPeter Krystad subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
2115cec37a6eSPeter Krystad subflow_v6m_specific.net_frag_header_len = 0;
21164cf86ae8SPaolo Abeni subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
2117b19bc294SPaolo Abeni
2118b19bc294SPaolo Abeni tcpv6_prot_override = tcpv6_prot;
2119b19bc294SPaolo Abeni tcpv6_prot_override.release_cb = tcp_release_cb_override;
2120c25546caSPaolo Abeni tcpv6_prot_override.diag_destroy = tcp_abort_override;
2121cec37a6eSPeter Krystad #endif
2122cec37a6eSPeter Krystad
21235147dfb5SDavide Caratti mptcp_diag_subflow_init(&subflow_ulp_ops);
21245147dfb5SDavide Caratti
21252303f994SPeter Krystad if (tcp_register_ulp(&subflow_ulp_ops) != 0)
21262303f994SPeter Krystad panic("MPTCP: failed to register subflows to ULP\n");
21272303f994SPeter Krystad }
2128