protocol.c (db6da59cf27b5661ced03754ae0550f8914eda9e) protocol.c (e1d001fa5b477c4da46a29be1fcece91db7c7c6f)
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
7#define pr_fmt(fmt) "MPTCP: " fmt
8

--- 76 unchanged lines hidden (view full) ---

85 struct sock *sk = (struct sock *)msk;
86 struct socket *ssock;
87 int err;
88
89 err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
90 if (err)
91 return err;
92
1// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
7#define pr_fmt(fmt) "MPTCP: " fmt
8

--- 76 unchanged lines hidden (view full) ---

85 struct sock *sk = (struct sock *)msk;
86 struct socket *ssock;
87 int err;
88
89 err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
90 if (err)
91 return err;
92
93 msk->first = ssock->sk;
94 msk->subflow = ssock;
93 WRITE_ONCE(msk->first, ssock->sk);
94 WRITE_ONCE(msk->subflow, ssock);
95 subflow = mptcp_subflow_ctx(ssock->sk);
96 list_add(&subflow->node, &msk->conn_list);
97 sock_hold(ssock->sk);
98 subflow->request_mptcp = 1;
99
100 /* This is the first subflow, always with id 0 */
101 subflow->local_id_valid = 1;
102 mptcp_sock_graft(msk->first, sk->sk_socket);

--- 495 unchanged lines hidden (view full) ---

598 * not caught up, do nothing and let the recv code send DATA_ACK
599 * when catching up.
600 */
601
602 if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) {
603 WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
604 WRITE_ONCE(msk->rcv_data_fin, 0);
605
95 subflow = mptcp_subflow_ctx(ssock->sk);
96 list_add(&subflow->node, &msk->conn_list);
97 sock_hold(ssock->sk);
98 subflow->request_mptcp = 1;
99
100 /* This is the first subflow, always with id 0 */
101 subflow->local_id_valid = 1;
102 mptcp_sock_graft(msk->first, sk->sk_socket);

--- 495 unchanged lines hidden (view full) ---

598 * not caught up, do nothing and let the recv code send DATA_ACK
599 * when catching up.
600 */
601
602 if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) {
603 WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
604 WRITE_ONCE(msk->rcv_data_fin, 0);
605
606 sk->sk_shutdown |= RCV_SHUTDOWN;
606 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
607 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
608
609 switch (sk->sk_state) {
610 case TCP_ESTABLISHED:
611 inet_sk_state_store(sk, TCP_CLOSE_WAIT);
612 break;
613 case TCP_FIN_WAIT1:
614 inet_sk_state_store(sk, TCP_CLOSING);

--- 205 unchanged lines hidden (view full) ---

820 /* Wake-up the reader only for in-sequence data */
821 mptcp_data_lock(sk);
822 if (move_skbs_to_msk(msk, ssk))
823 sk->sk_data_ready(sk);
824
825 mptcp_data_unlock(sk);
826}
827
607 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
608
609 switch (sk->sk_state) {
610 case TCP_ESTABLISHED:
611 inet_sk_state_store(sk, TCP_CLOSE_WAIT);
612 break;
613 case TCP_FIN_WAIT1:
614 inet_sk_state_store(sk, TCP_CLOSING);

--- 205 unchanged lines hidden (view full) ---

820 /* Wake-up the reader only for in-sequence data */
821 mptcp_data_lock(sk);
822 if (move_skbs_to_msk(msk, ssk))
823 sk->sk_data_ready(sk);
824
825 mptcp_data_unlock(sk);
826}
827
828static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
829{
830 mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
831 WRITE_ONCE(msk->allow_infinite_fallback, false);
832 mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
833}
834
828static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
829{
830 struct sock *sk = (struct sock *)msk;
831
832 if (sk->sk_state != TCP_ESTABLISHED)
833 return false;
834
835 /* attach to msk socket only after we are sure we will deal with it
836 * at close time
837 */
838 if (sk->sk_socket && !ssk->sk_socket)
839 mptcp_sock_graft(ssk, sk->sk_socket);
840
841 mptcp_sockopt_sync_locked(msk, ssk);
835static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
836{
837 struct sock *sk = (struct sock *)msk;
838
839 if (sk->sk_state != TCP_ESTABLISHED)
840 return false;
841
842 /* attach to msk socket only after we are sure we will deal with it
843 * at close time
844 */
845 if (sk->sk_socket && !ssk->sk_socket)
846 mptcp_sock_graft(ssk, sk->sk_socket);
847
848 mptcp_sockopt_sync_locked(msk, ssk);
849 mptcp_subflow_joined(msk, ssk);
842 return true;
843}
844
845static void __mptcp_flush_join_list(struct sock *sk)
846{
847 struct mptcp_subflow_context *tmp, *subflow;
848 struct mptcp_sock *msk = mptcp_sk(sk);
849

--- 55 unchanged lines hidden (view full) ---

905 receivers += !subflow->rx_eof;
906 if (receivers)
907 return;
908
909 if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
910 /* hopefully temporary hack: propagate shutdown status
911 * to msk, when all subflows agree on it
912 */
850 return true;
851}
852
853static void __mptcp_flush_join_list(struct sock *sk)
854{
855 struct mptcp_subflow_context *tmp, *subflow;
856 struct mptcp_sock *msk = mptcp_sk(sk);
857

--- 55 unchanged lines hidden (view full) ---

913 receivers += !subflow->rx_eof;
914 if (receivers)
915 return;
916
917 if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
918 /* hopefully temporary hack: propagate shutdown status
919 * to msk, when all subflows agree on it
920 */
913 sk->sk_shutdown |= RCV_SHUTDOWN;
921 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
914
915 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
916 sk->sk_data_ready(sk);
917 }
918
919 switch (sk->sk_state) {
920 case TCP_ESTABLISHED:
921 inet_sk_state_store(sk, TCP_CLOSE_WAIT);

--- 775 unchanged lines hidden (view full) ---

1697 }
1698 if (!msk->first)
1699 return -EINVAL;
1700
1701 ssk = msk->first;
1702
1703 lock_sock(ssk);
1704 msg->msg_flags |= MSG_DONTWAIT;
922
923 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
924 sk->sk_data_ready(sk);
925 }
926
927 switch (sk->sk_state) {
928 case TCP_ESTABLISHED:
929 inet_sk_state_store(sk, TCP_CLOSE_WAIT);

--- 775 unchanged lines hidden (view full) ---

1705 }
1706 if (!msk->first)
1707 return -EINVAL;
1708
1709 ssk = msk->first;
1710
1711 lock_sock(ssk);
1712 msg->msg_flags |= MSG_DONTWAIT;
1705 msk->connect_flags = O_NONBLOCK;
1706 msk->fastopening = 1;
1707 ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
1708 msk->fastopening = 0;
1709 msg->msg_flags = saved_flags;
1710 release_sock(ssk);
1711
1712 /* do the blocking bits of inet_stream_connect outside the ssk socket lock */
1713 if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {

--- 564 unchanged lines hidden (view full) ---

2278 /* use backup only if there are no progresses anywhere */
2279 return min_stale_count > 1 ? backup : NULL;
2280}
2281
2282static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
2283{
2284 if (msk->subflow) {
2285 iput(SOCK_INODE(msk->subflow));
1713 msk->fastopening = 1;
1714 ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
1715 msk->fastopening = 0;
1716 msg->msg_flags = saved_flags;
1717 release_sock(ssk);
1718
1719 /* do the blocking bits of inet_stream_connect outside the ssk socket lock */
1720 if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {

--- 564 unchanged lines hidden (view full) ---

2285 /* use backup only if there are no progresses anywhere */
2286 return min_stale_count > 1 ? backup : NULL;
2287}
2288
2289static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
2290{
2291 if (msk->subflow) {
2292 iput(SOCK_INODE(msk->subflow));
2286 msk->subflow = NULL;
2293 WRITE_ONCE(msk->subflow, NULL);
2287 }
2288}
2289
2290bool __mptcp_retransmit_pending_data(struct sock *sk)
2291{
2292 struct mptcp_data_frag *cur, *rtx_head;
2293 struct mptcp_sock *msk = mptcp_sk(sk);
2294

--- 120 unchanged lines hidden (view full) ---

2415 }
2416
2417out_release:
2418 release_sock(ssk);
2419
2420 sock_put(ssk);
2421
2422 if (ssk == msk->first)
2294 }
2295}
2296
2297bool __mptcp_retransmit_pending_data(struct sock *sk)
2298{
2299 struct mptcp_data_frag *cur, *rtx_head;
2300 struct mptcp_sock *msk = mptcp_sk(sk);
2301

--- 120 unchanged lines hidden (view full) ---

2422 }
2423
2424out_release:
2425 release_sock(ssk);
2426
2427 sock_put(ssk);
2428
2429 if (ssk == msk->first)
2423 msk->first = NULL;
2430 WRITE_ONCE(msk->first, NULL);
2424
2425out:
2426 if (ssk == msk->last_snd)
2427 msk->last_snd = NULL;
2428
2429 if (need_push)
2430 __mptcp_push_pending(sk, 0);
2431}

--- 90 unchanged lines hidden (view full) ---

2522 break;
2523 case TCP_CLOSE:
2524 return;
2525 default:
2526 WRITE_ONCE(sk->sk_err, ECONNRESET);
2527 }
2528
2529 inet_sk_state_store(sk, TCP_CLOSE);
2431
2432out:
2433 if (ssk == msk->last_snd)
2434 msk->last_snd = NULL;
2435
2436 if (need_push)
2437 __mptcp_push_pending(sk, 0);
2438}

--- 90 unchanged lines hidden (view full) ---

2529 break;
2530 case TCP_CLOSE:
2531 return;
2532 default:
2533 WRITE_ONCE(sk->sk_err, ECONNRESET);
2534 }
2535
2536 inet_sk_state_store(sk, TCP_CLOSE);
2530 sk->sk_shutdown = SHUTDOWN_MASK;
2537 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
2531 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
2532 set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
2533
2534 /* the calling mptcp_worker will properly destroy the socket */
2535 if (sock_flag(sk, SOCK_DEAD))
2536 return;
2537
2538 sk->sk_state_change(sk);

--- 177 unchanged lines hidden (view full) ---

2716 INIT_WORK(&msk->work, mptcp_worker);
2717 __skb_queue_head_init(&msk->receive_queue);
2718 msk->out_of_order_queue = RB_ROOT;
2719 msk->first_pending = NULL;
2720 msk->rmem_fwd_alloc = 0;
2721 WRITE_ONCE(msk->rmem_released, 0);
2722 msk->timer_ival = TCP_RTO_MIN;
2723
2538 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
2539 set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
2540
2541 /* the calling mptcp_worker will properly destroy the socket */
2542 if (sock_flag(sk, SOCK_DEAD))
2543 return;
2544
2545 sk->sk_state_change(sk);

--- 177 unchanged lines hidden (view full) ---

2723 INIT_WORK(&msk->work, mptcp_worker);
2724 __skb_queue_head_init(&msk->receive_queue);
2725 msk->out_of_order_queue = RB_ROOT;
2726 msk->first_pending = NULL;
2727 msk->rmem_fwd_alloc = 0;
2728 WRITE_ONCE(msk->rmem_released, 0);
2729 msk->timer_ival = TCP_RTO_MIN;
2730
2724 msk->first = NULL;
2731 WRITE_ONCE(msk->first, NULL);
2725 inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
2726 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
2727 WRITE_ONCE(msk->allow_infinite_fallback, true);
2728 msk->recovery = false;
2729
2730 mptcp_pm_data_init(msk);
2731
2732 /* re-use the csk retrans timer for MPTCP-level retrans */

--- 221 unchanged lines hidden (view full) ---

2954
2955bool __mptcp_close(struct sock *sk, long timeout)
2956{
2957 struct mptcp_subflow_context *subflow;
2958 struct mptcp_sock *msk = mptcp_sk(sk);
2959 bool do_cancel_work = false;
2960 int subflows_alive = 0;
2961
2732 inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
2733 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
2734 WRITE_ONCE(msk->allow_infinite_fallback, true);
2735 msk->recovery = false;
2736
2737 mptcp_pm_data_init(msk);
2738
2739 /* re-use the csk retrans timer for MPTCP-level retrans */

--- 221 unchanged lines hidden (view full) ---

2961
2962bool __mptcp_close(struct sock *sk, long timeout)
2963{
2964 struct mptcp_subflow_context *subflow;
2965 struct mptcp_sock *msk = mptcp_sk(sk);
2966 bool do_cancel_work = false;
2967 int subflows_alive = 0;
2968
2962 sk->sk_shutdown = SHUTDOWN_MASK;
2969 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
2963
2964 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
2965 mptcp_listen_inuse_dec(sk);
2966 inet_sk_state_store(sk, TCP_CLOSE);
2967 goto cleanup;
2968 }
2969
2970 if (mptcp_check_readable(msk) || timeout < 0) {

--- 63 unchanged lines hidden (view full) ---

3034 do_cancel_work = __mptcp_close(sk, timeout);
3035 release_sock(sk);
3036 if (do_cancel_work)
3037 mptcp_cancel_work(sk);
3038
3039 sock_put(sk);
3040}
3041
2970
2971 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
2972 mptcp_listen_inuse_dec(sk);
2973 inet_sk_state_store(sk, TCP_CLOSE);
2974 goto cleanup;
2975 }
2976
2977 if (mptcp_check_readable(msk) || timeout < 0) {

--- 63 unchanged lines hidden (view full) ---

3041 do_cancel_work = __mptcp_close(sk, timeout);
3042 release_sock(sk);
3043 if (do_cancel_work)
3044 mptcp_cancel_work(sk);
3045
3046 sock_put(sk);
3047}
3048
3042void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
3049static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
3043{
3044#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3045 const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
3046 struct ipv6_pinfo *msk6 = inet6_sk(msk);
3047
3048 msk->sk_v6_daddr = ssk->sk_v6_daddr;
3049 msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
3050

--- 46 unchanged lines hidden (view full) ---

3097 msk->rcv_data_fin = false;
3098 msk->snd_data_fin_enable = false;
3099 msk->rcv_fastclose = false;
3100 msk->use_64bit_ack = false;
3101 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
3102 mptcp_pm_data_reset(msk);
3103 mptcp_ca_reset(sk);
3104
3050{
3051#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3052 const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
3053 struct ipv6_pinfo *msk6 = inet6_sk(msk);
3054
3055 msk->sk_v6_daddr = ssk->sk_v6_daddr;
3056 msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
3057

--- 46 unchanged lines hidden (view full) ---

3104 msk->rcv_data_fin = false;
3105 msk->snd_data_fin_enable = false;
3106 msk->rcv_fastclose = false;
3107 msk->use_64bit_ack = false;
3108 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
3109 mptcp_pm_data_reset(msk);
3110 mptcp_ca_reset(sk);
3111
3105 sk->sk_shutdown = 0;
3112 WRITE_ONCE(sk->sk_shutdown, 0);
3106 sk_error_report(sk);
3107 return 0;
3108}
3109
3110#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3111static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
3112{
3113 unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo);
3114
3115 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
3116}
3117#endif
3118
3113 sk_error_report(sk);
3114 return 0;
3115}
3116
3117#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3118static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
3119{
3120 unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo);
3121
3122 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
3123}
3124#endif
3125
3119struct sock *mptcp_sk_clone(const struct sock *sk,
3120 const struct mptcp_options_received *mp_opt,
3121 struct request_sock *req)
3126struct sock *mptcp_sk_clone_init(const struct sock *sk,
3127 const struct mptcp_options_received *mp_opt,
3128 struct sock *ssk,
3129 struct request_sock *req)
3122{
3123 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
3124 struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
3125 struct mptcp_sock *msk;
3126
3127 if (!nsk)
3128 return NULL;
3129
3130#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3131 if (nsk->sk_family == AF_INET6)
3132 inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
3133#endif
3134
3135 __mptcp_init_sock(nsk);
3136
3137 msk = mptcp_sk(nsk);
3138 msk->local_key = subflow_req->local_key;
3139 msk->token = subflow_req->token;
3130{
3131 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
3132 struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
3133 struct mptcp_sock *msk;
3134
3135 if (!nsk)
3136 return NULL;
3137
3138#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3139 if (nsk->sk_family == AF_INET6)
3140 inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
3141#endif
3142
3143 __mptcp_init_sock(nsk);
3144
3145 msk = mptcp_sk(nsk);
3146 msk->local_key = subflow_req->local_key;
3147 msk->token = subflow_req->token;
3140 msk->subflow = NULL;
3148 WRITE_ONCE(msk->subflow, NULL);
3141 msk->in_accept_queue = 1;
3142 WRITE_ONCE(msk->fully_established, false);
3143 if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
3144 WRITE_ONCE(msk->csum_enabled, true);
3145
3146 msk->write_seq = subflow_req->idsn + 1;
3147 msk->snd_nxt = msk->write_seq;
3148 msk->snd_una = msk->write_seq;
3149 msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
3150 msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
3151
3152 sock_reset_flag(nsk, SOCK_RCU_FREE);
3149 msk->in_accept_queue = 1;
3150 WRITE_ONCE(msk->fully_established, false);
3151 if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
3152 WRITE_ONCE(msk->csum_enabled, true);
3153
3154 msk->write_seq = subflow_req->idsn + 1;
3155 msk->snd_nxt = msk->write_seq;
3156 msk->snd_una = msk->write_seq;
3157 msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
3158 msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
3159
3160 sock_reset_flag(nsk, SOCK_RCU_FREE);
3153 /* will be fully established after successful MPC subflow creation */
3154 inet_sk_state_store(nsk, TCP_SYN_RECV);
3155
3156 security_inet_csk_clone(nsk, req);
3161 security_inet_csk_clone(nsk, req);
3162
3163 /* this can't race with mptcp_close(), as the msk is
3164 * not yet exposted to user-space
3165 */
3166 inet_sk_state_store(nsk, TCP_ESTABLISHED);
3167
3168 /* The msk maintain a ref to each subflow in the connections list */
3169 WRITE_ONCE(msk->first, ssk);
3170 list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
3171 sock_hold(ssk);
3172
3173 /* new mpc subflow takes ownership of the newly
3174 * created mptcp socket
3175 */
3176 mptcp_token_accept(subflow_req, msk);
3177
3178 /* set msk addresses early to ensure mptcp_pm_get_local_id()
3179 * uses the correct data
3180 */
3181 mptcp_copy_inaddrs(nsk, ssk);
3182 mptcp_propagate_sndbuf(nsk, ssk);
3183
3184 mptcp_rcv_space_init(msk, ssk);
3157 bh_unlock_sock(nsk);
3158
3159 /* note: the newly allocated socket refcount is 2 now */
3160 return nsk;
3161}
3162
3163void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
3164{

--- 15 unchanged lines hidden (view full) ---

3180
3181static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
3182 bool kern)
3183{
3184 struct mptcp_sock *msk = mptcp_sk(sk);
3185 struct socket *listener;
3186 struct sock *newsk;
3187
3185 bh_unlock_sock(nsk);
3186
3187 /* note: the newly allocated socket refcount is 2 now */
3188 return nsk;
3189}
3190
3191void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
3192{

--- 15 unchanged lines hidden (view full) ---

3208
3209static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
3210 bool kern)
3211{
3212 struct mptcp_sock *msk = mptcp_sk(sk);
3213 struct socket *listener;
3214 struct sock *newsk;
3215
3188 listener = msk->subflow;
3216 listener = READ_ONCE(msk->subflow);
3189 if (WARN_ON_ONCE(!listener)) {
3190 *err = -EINVAL;
3191 return NULL;
3192 }
3193
3194 pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
3195 newsk = inet_csk_accept(listener->sk, flags, err, kern);
3196 if (!newsk)

--- 263 unchanged lines hidden (view full) ---

3460 pr_debug("msk=%p, subflow=%p", msk, subflow);
3461
3462 /* mptcp socket already closing? */
3463 if (!mptcp_is_fully_established(parent)) {
3464 subflow->reset_reason = MPTCP_RST_EMPTCP;
3465 return false;
3466 }
3467
3217 if (WARN_ON_ONCE(!listener)) {
3218 *err = -EINVAL;
3219 return NULL;
3220 }
3221
3222 pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
3223 newsk = inet_csk_accept(listener->sk, flags, err, kern);
3224 if (!newsk)

--- 263 unchanged lines hidden (view full) ---

3488 pr_debug("msk=%p, subflow=%p", msk, subflow);
3489
3490 /* mptcp socket already closing? */
3491 if (!mptcp_is_fully_established(parent)) {
3492 subflow->reset_reason = MPTCP_RST_EMPTCP;
3493 return false;
3494 }
3495
3468 if (!list_empty(&subflow->node))
3469 goto out;
3496 /* active subflow, already present inside the conn_list */
3497 if (!list_empty(&subflow->node)) {
3498 mptcp_subflow_joined(msk, ssk);
3499 return true;
3500 }
3470
3471 if (!mptcp_pm_allow_new_subflow(msk))
3472 goto err_prohibited;
3473
3501
3502 if (!mptcp_pm_allow_new_subflow(msk))
3503 goto err_prohibited;
3504
3474 /* active connections are already on conn_list.
3475 * If we can't acquire msk socket lock here, let the release callback
3505 /* If we can't acquire msk socket lock here, let the release callback
3476 * handle it
3477 */
3478 mptcp_data_lock(parent);
3479 if (!sock_owned_by_user(parent)) {
3480 ret = __mptcp_finish_join(msk, ssk);
3481 if (ret) {
3482 sock_hold(ssk);
3483 list_add_tail(&subflow->node, &msk->conn_list);

--- 6 unchanged lines hidden (view full) ---

3490 mptcp_data_unlock(parent);
3491
3492 if (!ret) {
3493err_prohibited:
3494 subflow->reset_reason = MPTCP_RST_EPROHIBIT;
3495 return false;
3496 }
3497
3506 * handle it
3507 */
3508 mptcp_data_lock(parent);
3509 if (!sock_owned_by_user(parent)) {
3510 ret = __mptcp_finish_join(msk, ssk);
3511 if (ret) {
3512 sock_hold(ssk);
3513 list_add_tail(&subflow->node, &msk->conn_list);

--- 6 unchanged lines hidden (view full) ---

3520 mptcp_data_unlock(parent);
3521
3522 if (!ret) {
3523err_prohibited:
3524 subflow->reset_reason = MPTCP_RST_EPROHIBIT;
3525 return false;
3526 }
3527
3498 subflow->map_seq = READ_ONCE(msk->ack_seq);
3499 WRITE_ONCE(msk->allow_infinite_fallback, false);
3500
3501out:
3502 mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
3503 return true;
3504}
3505
3506static void mptcp_shutdown(struct sock *sk, int how)
3507{
3508 pr_debug("sk=%p, how=%d", sk, how);
3509
3510 if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk))

--- 29 unchanged lines hidden (view full) ---

3540 delta += READ_ONCE(tp->write_seq) - tp->snd_una;
3541 }
3542 if (delta > INT_MAX)
3543 delta = INT_MAX;
3544
3545 return (int)delta;
3546}
3547
3528 return true;
3529}
3530
3531static void mptcp_shutdown(struct sock *sk, int how)
3532{
3533 pr_debug("sk=%p, how=%d", sk, how);
3534
3535 if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk))

--- 29 unchanged lines hidden (view full) ---

3565 delta += READ_ONCE(tp->write_seq) - tp->snd_una;
3566 }
3567 if (delta > INT_MAX)
3568 delta = INT_MAX;
3569
3570 return (int)delta;
3571}
3572
3548static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3573static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
3549{
3550 struct mptcp_sock *msk = mptcp_sk(sk);
3551 bool slow;
3574{
3575 struct mptcp_sock *msk = mptcp_sk(sk);
3576 bool slow;
3552 int answ;
3553
3554 switch (cmd) {
3555 case SIOCINQ:
3556 if (sk->sk_state == TCP_LISTEN)
3557 return -EINVAL;
3558
3559 lock_sock(sk);
3560 __mptcp_move_skbs(msk);
3577
3578 switch (cmd) {
3579 case SIOCINQ:
3580 if (sk->sk_state == TCP_LISTEN)
3581 return -EINVAL;
3582
3583 lock_sock(sk);
3584 __mptcp_move_skbs(msk);
3561 answ = mptcp_inq_hint(sk);
3585 *karg = mptcp_inq_hint(sk);
3562 release_sock(sk);
3563 break;
3564 case SIOCOUTQ:
3565 slow = lock_sock_fast(sk);
3586 release_sock(sk);
3587 break;
3588 case SIOCOUTQ:
3589 slow = lock_sock_fast(sk);
3566 answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
3590 *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
3567 unlock_sock_fast(sk, slow);
3568 break;
3569 case SIOCOUTQNSD:
3570 slow = lock_sock_fast(sk);
3591 unlock_sock_fast(sk, slow);
3592 break;
3593 case SIOCOUTQNSD:
3594 slow = lock_sock_fast(sk);
3571 answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
3595 *karg = mptcp_ioctl_outq(msk, msk->snd_nxt);
3572 unlock_sock_fast(sk, slow);
3573 break;
3574 default:
3575 return -ENOIOCTLCMD;
3576 }
3577
3596 unlock_sock_fast(sk, slow);
3597 break;
3598 default:
3599 return -ENOIOCTLCMD;
3600 }
3601
3578 return put_user(answ, (int __user *)arg);
3602 return 0;
3579}
3580
3581static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
3582 struct mptcp_subflow_context *subflow)
3583{
3584 subflow->request_mptcp = 0;
3585 __mptcp_do_fallback(msk);
3586}

--- 25 unchanged lines hidden (view full) ---

3612 }
3613 if (likely(!__mptcp_check_fallback(msk)))
3614 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
3615
3616 /* if reaching here via the fastopen/sendmsg path, the caller already
3617 * acquired the subflow socket lock, too.
3618 */
3619 if (msk->fastopening)
3603}
3604
3605static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
3606 struct mptcp_subflow_context *subflow)
3607{
3608 subflow->request_mptcp = 0;
3609 __mptcp_do_fallback(msk);
3610}

--- 25 unchanged lines hidden (view full) ---

3636 }
3637 if (likely(!__mptcp_check_fallback(msk)))
3638 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
3639
3640 /* if reaching here via the fastopen/sendmsg path, the caller already
3641 * acquired the subflow socket lock, too.
3642 */
3643 if (msk->fastopening)
3620 err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
3644 err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
3621 else
3645 else
3622 err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
3646 err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
3623 inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
3624
3625 /* on successful connect, the msk state will be moved to established by
3626 * subflow_finish_connect()
3627 */
3628 if (unlikely(err && err != -EINPROGRESS)) {
3629 inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
3630 return err;
3631 }
3632
3633 mptcp_copy_inaddrs(sk, ssock->sk);
3634
3647 inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
3648
3649 /* on successful connect, the msk state will be moved to established by
3650 * subflow_finish_connect()
3651 */
3652 if (unlikely(err && err != -EINPROGRESS)) {
3653 inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
3654 return err;
3655 }
3656
3657 mptcp_copy_inaddrs(sk, ssock->sk);
3658
3635 /* unblocking connect, mptcp-level inet_stream_connect will error out
3636 * without changing the socket state, update it here.
3659 /* silence EINPROGRESS and let the caller inet_stream_connect
3660 * handle the connection in progress
3637 */
3661 */
3638 if (err == -EINPROGRESS)
3639 sk->sk_socket->state = ssock->state;
3640 return err;
3662 return 0;
3641}
3642
3643static struct proto mptcp_prot = {
3644 .name = "MPTCP",
3645 .owner = THIS_MODULE,
3646 .init = mptcp_init_sock,
3647 .connect = mptcp_connect,
3648 .disconnect = mptcp_disconnect,

--- 42 unchanged lines hidden (view full) ---

3691 if (!err)
3692 mptcp_copy_inaddrs(sock->sk, ssock->sk);
3693
3694unlock:
3695 release_sock(sock->sk);
3696 return err;
3697}
3698
3663}
3664
3665static struct proto mptcp_prot = {
3666 .name = "MPTCP",
3667 .owner = THIS_MODULE,
3668 .init = mptcp_init_sock,
3669 .connect = mptcp_connect,
3670 .disconnect = mptcp_disconnect,

--- 42 unchanged lines hidden (view full) ---

3713 if (!err)
3714 mptcp_copy_inaddrs(sock->sk, ssock->sk);
3715
3716unlock:
3717 release_sock(sock->sk);
3718 return err;
3719}
3720
3699static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
3700 int addr_len, int flags)
3701{
3702 int ret;
3703
3704 lock_sock(sock->sk);
3705 mptcp_sk(sock->sk)->connect_flags = flags;
3706 ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
3707 release_sock(sock->sk);
3708 return ret;
3709}
3710
3711static int mptcp_listen(struct socket *sock, int backlog)
3712{
3713 struct mptcp_sock *msk = mptcp_sk(sock->sk);
3714 struct sock *sk = sock->sk;
3715 struct socket *ssock;
3716 int err;
3717
3718 pr_debug("msk=%p", msk);

--- 23 unchanged lines hidden (view full) ---

3742 return err;
3743}
3744
3745static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
3746 int flags, bool kern)
3747{
3748 struct mptcp_sock *msk = mptcp_sk(sock->sk);
3749 struct socket *ssock;
3721static int mptcp_listen(struct socket *sock, int backlog)
3722{
3723 struct mptcp_sock *msk = mptcp_sk(sock->sk);
3724 struct sock *sk = sock->sk;
3725 struct socket *ssock;
3726 int err;
3727
3728 pr_debug("msk=%p", msk);

--- 23 unchanged lines hidden (view full) ---

3752 return err;
3753}
3754
3755static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
3756 int flags, bool kern)
3757{
3758 struct mptcp_sock *msk = mptcp_sk(sock->sk);
3759 struct socket *ssock;
3760 struct sock *newsk;
3750 int err;
3751
3752 pr_debug("msk=%p", msk);
3753
3761 int err;
3762
3763 pr_debug("msk=%p", msk);
3764
3754 /* buggy applications can call accept on socket states other then LISTEN
3765 /* Buggy applications can call accept on socket states other then LISTEN
3755 * but no need to allocate the first subflow just to error out.
3756 */
3766 * but no need to allocate the first subflow just to error out.
3767 */
3757 ssock = msk->subflow;
3768 ssock = READ_ONCE(msk->subflow);
3758 if (!ssock)
3759 return -EINVAL;
3760
3769 if (!ssock)
3770 return -EINVAL;
3771
3761 err = ssock->ops->accept(sock, newsock, flags, kern);
3762 if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
3763 struct mptcp_sock *msk = mptcp_sk(newsock->sk);
3772 newsk = mptcp_accept(sock->sk, flags, &err, kern);
3773 if (!newsk)
3774 return err;
3775
3776 lock_sock(newsk);
3777
3778 __inet_accept(sock, newsock, newsk);
3779 if (!mptcp_is_tcpsk(newsock->sk)) {
3780 struct mptcp_sock *msk = mptcp_sk(newsk);
3764 struct mptcp_subflow_context *subflow;
3781 struct mptcp_subflow_context *subflow;
3765 struct sock *newsk = newsock->sk;
3766
3767 set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
3768 msk->in_accept_queue = 0;
3769
3782
3783 set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
3784 msk->in_accept_queue = 0;
3785
3770 lock_sock(newsk);
3771
3772 /* set ssk->sk_socket of accept()ed flows to mptcp socket.
3773 * This is needed so NOSPACE flag can be set from tcp stack.
3774 */
3775 mptcp_for_each_subflow(msk, subflow) {
3776 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
3777
3778 if (!ssk->sk_socket)
3779 mptcp_sock_graft(ssk, newsock);

--- 4 unchanged lines hidden (view full) ---

3784 */
3785 if (msk->first &&
3786 unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
3787 __mptcp_close_ssk(newsk, msk->first,
3788 mptcp_subflow_ctx(msk->first), 0);
3789 if (unlikely(list_empty(&msk->conn_list)))
3790 inet_sk_state_store(newsk, TCP_CLOSE);
3791 }
3786 /* set ssk->sk_socket of accept()ed flows to mptcp socket.
3787 * This is needed so NOSPACE flag can be set from tcp stack.
3788 */
3789 mptcp_for_each_subflow(msk, subflow) {
3790 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
3791
3792 if (!ssk->sk_socket)
3793 mptcp_sock_graft(ssk, newsock);

--- 4 unchanged lines hidden (view full) ---

3798 */
3799 if (msk->first &&
3800 unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
3801 __mptcp_close_ssk(newsk, msk->first,
3802 mptcp_subflow_ctx(msk->first), 0);
3803 if (unlikely(list_empty(&msk->conn_list)))
3804 inet_sk_state_store(newsk, TCP_CLOSE);
3805 }
3792
3793 release_sock(newsk);
3794 }
3806 }
3807 release_sock(newsk);
3795
3808
3796 return err;
3809 return 0;
3797}
3798
3799static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
3800{
3801 struct sock *sk = (struct sock *)msk;
3802
3810}
3811
3812static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
3813{
3814 struct sock *sk = (struct sock *)msk;
3815
3803 if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
3804 return EPOLLOUT | EPOLLWRNORM;
3805
3806 if (sk_stream_is_writeable(sk))
3807 return EPOLLOUT | EPOLLWRNORM;
3808
3809 mptcp_set_nospace(sk);
3810 smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
3811 if (sk_stream_is_writeable(sk))
3812 return EPOLLOUT | EPOLLWRNORM;
3813
3814 return 0;
3815}
3816
3817static __poll_t mptcp_poll(struct file *file, struct socket *sock,
3818 struct poll_table_struct *wait)
3819{
3820 struct sock *sk = sock->sk;
3821 struct mptcp_sock *msk;
3822 __poll_t mask = 0;
3816 if (sk_stream_is_writeable(sk))
3817 return EPOLLOUT | EPOLLWRNORM;
3818
3819 mptcp_set_nospace(sk);
3820 smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
3821 if (sk_stream_is_writeable(sk))
3822 return EPOLLOUT | EPOLLWRNORM;
3823
3824 return 0;
3825}
3826
3827static __poll_t mptcp_poll(struct file *file, struct socket *sock,
3828 struct poll_table_struct *wait)
3829{
3830 struct sock *sk = sock->sk;
3831 struct mptcp_sock *msk;
3832 __poll_t mask = 0;
3833 u8 shutdown;
3823 int state;
3824
3825 msk = mptcp_sk(sk);
3826 sock_poll_wait(file, sock, wait);
3827
3828 state = inet_sk_state_load(sk);
3829 pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
3830 if (state == TCP_LISTEN) {
3834 int state;
3835
3836 msk = mptcp_sk(sk);
3837 sock_poll_wait(file, sock, wait);
3838
3839 state = inet_sk_state_load(sk);
3840 pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
3841 if (state == TCP_LISTEN) {
3831 if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
3842 struct socket *ssock = READ_ONCE(msk->subflow);
3843
3844 if (WARN_ON_ONCE(!ssock || !ssock->sk))
3832 return 0;
3833
3845 return 0;
3846
3834 return inet_csk_listen_poll(msk->subflow->sk);
3847 return inet_csk_listen_poll(ssock->sk);
3835 }
3836
3848 }
3849
3850 shutdown = READ_ONCE(sk->sk_shutdown);
3851 if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
3852 mask |= EPOLLHUP;
3853 if (shutdown & RCV_SHUTDOWN)
3854 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
3855
3837 if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
3838 mask |= mptcp_check_readable(msk);
3856 if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
3857 mask |= mptcp_check_readable(msk);
3839 mask |= mptcp_check_writeable(msk);
3858 if (shutdown & SEND_SHUTDOWN)
3859 mask |= EPOLLOUT | EPOLLWRNORM;
3860 else
3861 mask |= mptcp_check_writeable(msk);
3840 } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
3841 /* cf tcp_poll() note about TFO */
3842 mask |= EPOLLOUT | EPOLLWRNORM;
3843 }
3862 } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
3863 /* cf tcp_poll() note about TFO */
3864 mask |= EPOLLOUT | EPOLLWRNORM;
3865 }
3844 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
3845 mask |= EPOLLHUP;
3846 if (sk->sk_shutdown & RCV_SHUTDOWN)
3847 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
3848
3849 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
3850 smp_rmb();
3851 if (READ_ONCE(sk->sk_err))
3852 mask |= EPOLLERR;
3853
3854 return mask;
3855}
3856
3857static const struct proto_ops mptcp_stream_ops = {
3858 .family = PF_INET,
3859 .owner = THIS_MODULE,
3860 .release = inet_release,
3861 .bind = mptcp_bind,
3866
3867 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
3868 smp_rmb();
3869 if (READ_ONCE(sk->sk_err))
3870 mask |= EPOLLERR;
3871
3872 return mask;
3873}
3874
3875static const struct proto_ops mptcp_stream_ops = {
3876 .family = PF_INET,
3877 .owner = THIS_MODULE,
3878 .release = inet_release,
3879 .bind = mptcp_bind,
3862 .connect = mptcp_stream_connect,
3880 .connect = inet_stream_connect,
3863 .socketpair = sock_no_socketpair,
3864 .accept = mptcp_stream_accept,
3865 .getname = inet_getname,
3866 .poll = mptcp_poll,
3867 .ioctl = inet_ioctl,
3868 .gettstamp = sock_gettstamp,
3869 .listen = mptcp_listen,
3870 .shutdown = inet_shutdown,

--- 78 unchanged lines hidden (view full) ---

3949}
3950
3951#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3952static const struct proto_ops mptcp_v6_stream_ops = {
3953 .family = PF_INET6,
3954 .owner = THIS_MODULE,
3955 .release = inet6_release,
3956 .bind = mptcp_bind,
3881 .socketpair = sock_no_socketpair,
3882 .accept = mptcp_stream_accept,
3883 .getname = inet_getname,
3884 .poll = mptcp_poll,
3885 .ioctl = inet_ioctl,
3886 .gettstamp = sock_gettstamp,
3887 .listen = mptcp_listen,
3888 .shutdown = inet_shutdown,

--- 78 unchanged lines hidden (view full) ---

3967}
3968
3969#if IS_ENABLED(CONFIG_MPTCP_IPV6)
3970static const struct proto_ops mptcp_v6_stream_ops = {
3971 .family = PF_INET6,
3972 .owner = THIS_MODULE,
3973 .release = inet6_release,
3974 .bind = mptcp_bind,
3957 .connect = mptcp_stream_connect,
3975 .connect = inet_stream_connect,
3958 .socketpair = sock_no_socketpair,
3959 .accept = mptcp_stream_accept,
3960 .getname = inet6_getname,
3961 .poll = mptcp_poll,
3962 .ioctl = inet6_ioctl,
3963 .gettstamp = sock_gettstamp,
3964 .listen = mptcp_listen,
3965 .shutdown = inet_shutdown,

--- 41 unchanged lines hidden ---
3976 .socketpair = sock_no_socketpair,
3977 .accept = mptcp_stream_accept,
3978 .getname = inet6_getname,
3979 .poll = mptcp_poll,
3980 .ioctl = inet6_ioctl,
3981 .gettstamp = sock_gettstamp,
3982 .listen = mptcp_listen,
3983 .shutdown = inet_shutdown,

--- 41 unchanged lines hidden ---