1*d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 29b73896aSTom Herbert /* 39b73896aSTom Herbert * Kernel Connection Multiplexor 49b73896aSTom Herbert * 59b73896aSTom Herbert * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> 69b73896aSTom Herbert */ 79b73896aSTom Herbert 8ab7ac4ebSTom Herbert #include <linux/bpf.h> 9ab7ac4ebSTom Herbert #include <linux/errno.h> 10ab7ac4ebSTom Herbert #include <linux/errqueue.h> 11ab7ac4ebSTom Herbert #include <linux/file.h> 12ab7ac4ebSTom Herbert #include <linux/in.h> 13ab7ac4ebSTom Herbert #include <linux/kernel.h> 14ab7ac4ebSTom Herbert #include <linux/module.h> 15ab7ac4ebSTom Herbert #include <linux/net.h> 16ab7ac4ebSTom Herbert #include <linux/netdevice.h> 17ab7ac4ebSTom Herbert #include <linux/poll.h> 18ab7ac4ebSTom Herbert #include <linux/rculist.h> 19ab7ac4ebSTom Herbert #include <linux/skbuff.h> 20ab7ac4ebSTom Herbert #include <linux/socket.h> 21ab7ac4ebSTom Herbert #include <linux/uaccess.h> 22ab7ac4ebSTom Herbert #include <linux/workqueue.h> 23c0338affSWANG Cong #include <linux/syscalls.h> 24174cd4b1SIngo Molnar #include <linux/sched/signal.h> 25174cd4b1SIngo Molnar 26ab7ac4ebSTom Herbert #include <net/kcm.h> 27ab7ac4ebSTom Herbert #include <net/netns/generic.h> 28ab7ac4ebSTom Herbert #include <net/sock.h> 29ab7ac4ebSTom Herbert #include <uapi/linux/kcm.h> 30ab7ac4ebSTom Herbert 31ab7ac4ebSTom Herbert unsigned int kcm_net_id; 32ab7ac4ebSTom Herbert 33ab7ac4ebSTom Herbert static struct kmem_cache *kcm_psockp __read_mostly; 34ab7ac4ebSTom Herbert static struct kmem_cache *kcm_muxp __read_mostly; 35ab7ac4ebSTom Herbert static struct workqueue_struct *kcm_wq; 36ab7ac4ebSTom Herbert 37ab7ac4ebSTom Herbert static inline struct kcm_sock *kcm_sk(const struct sock *sk) 38ab7ac4ebSTom Herbert { 39ab7ac4ebSTom Herbert return (struct kcm_sock *)sk; 40ab7ac4ebSTom Herbert } 41ab7ac4ebSTom Herbert 42ab7ac4ebSTom Herbert static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) 43ab7ac4ebSTom Herbert { 44ab7ac4ebSTom Herbert return (struct kcm_tx_msg *)skb->cb; 45ab7ac4ebSTom Herbert } 46ab7ac4ebSTom Herbert 47ab7ac4ebSTom Herbert static void report_csk_error(struct sock *csk, int err) 48ab7ac4ebSTom Herbert { 49ab7ac4ebSTom Herbert csk->sk_err = EPIPE; 50ab7ac4ebSTom Herbert csk->sk_error_report(csk); 51ab7ac4ebSTom Herbert } 52ab7ac4ebSTom Herbert 53ab7ac4ebSTom Herbert static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, 54ab7ac4ebSTom Herbert bool wakeup_kcm) 55ab7ac4ebSTom Herbert { 56ab7ac4ebSTom Herbert struct sock *csk = psock->sk; 57ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 58ab7ac4ebSTom Herbert 59ab7ac4ebSTom Herbert /* Unrecoverable error in transmit */ 60ab7ac4ebSTom Herbert 61ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 62ab7ac4ebSTom Herbert 63ab7ac4ebSTom Herbert if (psock->tx_stopped) { 64ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 65ab7ac4ebSTom Herbert return; 66ab7ac4ebSTom Herbert } 67ab7ac4ebSTom Herbert 68ab7ac4ebSTom Herbert psock->tx_stopped = 1; 69cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.tx_aborts); 70ab7ac4ebSTom Herbert 71ab7ac4ebSTom Herbert if (!psock->tx_kcm) { 72ab7ac4ebSTom Herbert /* Take off psocks_avail list */ 73ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 74ab7ac4ebSTom Herbert } else if (wakeup_kcm) { 75ab7ac4ebSTom Herbert /* In this case psock is being aborted while outside of 76ab7ac4ebSTom Herbert * write_msgs and psock is reserved. Schedule tx_work 77ab7ac4ebSTom Herbert * to handle the failure there. Need to commit tx_stopped 78ab7ac4ebSTom Herbert * before queuing work. 79ab7ac4ebSTom Herbert */ 80ab7ac4ebSTom Herbert smp_mb(); 81ab7ac4ebSTom Herbert 82ab7ac4ebSTom Herbert queue_work(kcm_wq, &psock->tx_kcm->tx_work); 83ab7ac4ebSTom Herbert } 84ab7ac4ebSTom Herbert 85ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 86ab7ac4ebSTom Herbert 87ab7ac4ebSTom Herbert /* Report error on lower socket */ 88ab7ac4ebSTom Herbert report_csk_error(csk, err); 89ab7ac4ebSTom Herbert } 90ab7ac4ebSTom Herbert 91cd6e111bSTom Herbert /* RX mux lock held. */ 92cd6e111bSTom Herbert static void kcm_update_rx_mux_stats(struct kcm_mux *mux, 93cd6e111bSTom Herbert struct kcm_psock *psock) 94cd6e111bSTom Herbert { 959b73896aSTom Herbert STRP_STATS_ADD(mux->stats.rx_bytes, 96bbb03029STom Herbert psock->strp.stats.bytes - 979b73896aSTom Herbert psock->saved_rx_bytes); 98cd6e111bSTom Herbert mux->stats.rx_msgs += 99bbb03029STom Herbert psock->strp.stats.msgs - psock->saved_rx_msgs; 100bbb03029STom Herbert psock->saved_rx_msgs = psock->strp.stats.msgs; 101bbb03029STom Herbert psock->saved_rx_bytes = psock->strp.stats.bytes; 102cd6e111bSTom Herbert } 103cd6e111bSTom Herbert 104cd6e111bSTom Herbert static void kcm_update_tx_mux_stats(struct kcm_mux *mux, 105cd6e111bSTom Herbert struct kcm_psock *psock) 106cd6e111bSTom Herbert { 107cd6e111bSTom Herbert KCM_STATS_ADD(mux->stats.tx_bytes, 108cd6e111bSTom Herbert psock->stats.tx_bytes - psock->saved_tx_bytes); 109cd6e111bSTom Herbert mux->stats.tx_msgs += 110cd6e111bSTom Herbert psock->stats.tx_msgs - psock->saved_tx_msgs; 111cd6e111bSTom Herbert psock->saved_tx_msgs = psock->stats.tx_msgs; 112cd6e111bSTom Herbert psock->saved_tx_bytes = psock->stats.tx_bytes; 113cd6e111bSTom Herbert } 114cd6e111bSTom Herbert 115ab7ac4ebSTom Herbert static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 116ab7ac4ebSTom Herbert 117ab7ac4ebSTom Herbert /* KCM is ready to receive messages on its queue-- either the KCM is new or 118ab7ac4ebSTom Herbert * has become unblocked after being blocked on full socket buffer. Queue any 119ab7ac4ebSTom Herbert * pending ready messages on a psock. RX mux lock held. 120ab7ac4ebSTom Herbert */ 121ab7ac4ebSTom Herbert static void kcm_rcv_ready(struct kcm_sock *kcm) 122ab7ac4ebSTom Herbert { 123ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 124ab7ac4ebSTom Herbert struct kcm_psock *psock; 125ab7ac4ebSTom Herbert struct sk_buff *skb; 126ab7ac4ebSTom Herbert 127ab7ac4ebSTom Herbert if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) 128ab7ac4ebSTom Herbert return; 129ab7ac4ebSTom Herbert 130ab7ac4ebSTom Herbert while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { 131ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 132ab7ac4ebSTom Herbert /* Assuming buffer limit has been reached */ 133ab7ac4ebSTom Herbert skb_queue_head(&mux->rx_hold_queue, skb); 134ab7ac4ebSTom Herbert WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); 135ab7ac4ebSTom Herbert return; 136ab7ac4ebSTom Herbert } 137ab7ac4ebSTom Herbert } 138ab7ac4ebSTom Herbert 139ab7ac4ebSTom Herbert while (!list_empty(&mux->psocks_ready)) { 140ab7ac4ebSTom Herbert psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, 141ab7ac4ebSTom Herbert psock_ready_list); 142ab7ac4ebSTom Herbert 143ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { 144ab7ac4ebSTom Herbert /* Assuming buffer limit has been reached */ 145ab7ac4ebSTom Herbert WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); 146ab7ac4ebSTom Herbert return; 147ab7ac4ebSTom Herbert } 148ab7ac4ebSTom Herbert 149ab7ac4ebSTom Herbert /* Consumed the ready message on the psock. Schedule rx_work to 150ab7ac4ebSTom Herbert * get more messages. 151ab7ac4ebSTom Herbert */ 152ab7ac4ebSTom Herbert list_del(&psock->psock_ready_list); 153ab7ac4ebSTom Herbert psock->ready_rx_msg = NULL; 154ab7ac4ebSTom Herbert /* Commit clearing of ready_rx_msg for queuing work */ 155ab7ac4ebSTom Herbert smp_mb(); 156ab7ac4ebSTom Herbert 1579b73896aSTom Herbert strp_unpause(&psock->strp); 1589b73896aSTom Herbert strp_check_rcv(&psock->strp); 159ab7ac4ebSTom Herbert } 160ab7ac4ebSTom Herbert 161ab7ac4ebSTom Herbert /* Buffer limit is okay now, add to ready list */ 162ab7ac4ebSTom Herbert list_add_tail(&kcm->wait_rx_list, 163ab7ac4ebSTom Herbert &kcm->mux->kcm_rx_waiters); 164ab7ac4ebSTom Herbert kcm->rx_wait = true; 165ab7ac4ebSTom Herbert } 166ab7ac4ebSTom Herbert 167ab7ac4ebSTom Herbert static void kcm_rfree(struct sk_buff *skb) 168ab7ac4ebSTom Herbert { 169ab7ac4ebSTom Herbert struct sock *sk = skb->sk; 170ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 171ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 172ab7ac4ebSTom Herbert unsigned int len = skb->truesize; 173ab7ac4ebSTom Herbert 174ab7ac4ebSTom Herbert sk_mem_uncharge(sk, len); 175ab7ac4ebSTom Herbert atomic_sub(len, &sk->sk_rmem_alloc); 176ab7ac4ebSTom Herbert 177ab7ac4ebSTom Herbert /* For reading rx_wait and rx_psock without holding lock */ 178ab7ac4ebSTom Herbert smp_mb__after_atomic(); 179ab7ac4ebSTom Herbert 180ab7ac4ebSTom Herbert if (!kcm->rx_wait && !kcm->rx_psock && 181ab7ac4ebSTom Herbert sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { 182ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 183ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 184ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 185ab7ac4ebSTom Herbert } 186ab7ac4ebSTom Herbert } 187ab7ac4ebSTom Herbert 188ab7ac4ebSTom Herbert static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 189ab7ac4ebSTom Herbert { 190ab7ac4ebSTom Herbert struct sk_buff_head *list = &sk->sk_receive_queue; 191ab7ac4ebSTom Herbert 192ab7ac4ebSTom Herbert if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 193ab7ac4ebSTom Herbert return -ENOMEM; 194ab7ac4ebSTom Herbert 195ab7ac4ebSTom Herbert if (!sk_rmem_schedule(sk, skb, skb->truesize)) 196ab7ac4ebSTom Herbert return -ENOBUFS; 197ab7ac4ebSTom Herbert 198ab7ac4ebSTom Herbert skb->dev = NULL; 199ab7ac4ebSTom Herbert 200ab7ac4ebSTom Herbert skb_orphan(skb); 201ab7ac4ebSTom Herbert skb->sk = sk; 202ab7ac4ebSTom Herbert skb->destructor = kcm_rfree; 203ab7ac4ebSTom Herbert atomic_add(skb->truesize, &sk->sk_rmem_alloc); 204ab7ac4ebSTom Herbert sk_mem_charge(sk, skb->truesize); 205ab7ac4ebSTom Herbert 206ab7ac4ebSTom Herbert skb_queue_tail(list, skb); 207ab7ac4ebSTom Herbert 208ab7ac4ebSTom Herbert if (!sock_flag(sk, SOCK_DEAD)) 209ab7ac4ebSTom Herbert sk->sk_data_ready(sk); 210ab7ac4ebSTom Herbert 211ab7ac4ebSTom Herbert return 0; 212ab7ac4ebSTom Herbert } 213ab7ac4ebSTom Herbert 214ab7ac4ebSTom Herbert /* Requeue received messages for a kcm socket to other kcm sockets. This is 215ab7ac4ebSTom Herbert * called with a kcm socket is receive disabled. 216ab7ac4ebSTom Herbert * RX mux lock held. 217ab7ac4ebSTom Herbert */ 218ab7ac4ebSTom Herbert static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) 219ab7ac4ebSTom Herbert { 220ab7ac4ebSTom Herbert struct sk_buff *skb; 221ab7ac4ebSTom Herbert struct kcm_sock *kcm; 222ab7ac4ebSTom Herbert 223ab7ac4ebSTom Herbert while ((skb = __skb_dequeue(head))) { 224ab7ac4ebSTom Herbert /* Reset destructor to avoid calling kcm_rcv_ready */ 225ab7ac4ebSTom Herbert skb->destructor = sock_rfree; 226ab7ac4ebSTom Herbert skb_orphan(skb); 227ab7ac4ebSTom Herbert try_again: 228ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_rx_waiters)) { 229ab7ac4ebSTom Herbert skb_queue_tail(&mux->rx_hold_queue, skb); 230ab7ac4ebSTom Herbert continue; 231ab7ac4ebSTom Herbert } 232ab7ac4ebSTom Herbert 233ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_rx_waiters, 234ab7ac4ebSTom Herbert struct kcm_sock, wait_rx_list); 235ab7ac4ebSTom Herbert 236ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 237ab7ac4ebSTom Herbert /* Should mean socket buffer full */ 238ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 239ab7ac4ebSTom Herbert kcm->rx_wait = false; 240ab7ac4ebSTom Herbert 241ab7ac4ebSTom Herbert /* Commit rx_wait to read in kcm_free */ 242ab7ac4ebSTom Herbert smp_wmb(); 243ab7ac4ebSTom Herbert 244ab7ac4ebSTom Herbert goto try_again; 245ab7ac4ebSTom Herbert } 246ab7ac4ebSTom Herbert } 247ab7ac4ebSTom Herbert } 248ab7ac4ebSTom Herbert 249ab7ac4ebSTom Herbert /* Lower sock lock held */ 250ab7ac4ebSTom Herbert static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, 251ab7ac4ebSTom Herbert struct sk_buff *head) 252ab7ac4ebSTom Herbert { 253ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 254ab7ac4ebSTom Herbert struct kcm_sock *kcm; 255ab7ac4ebSTom Herbert 256ab7ac4ebSTom Herbert WARN_ON(psock->ready_rx_msg); 257ab7ac4ebSTom Herbert 258ab7ac4ebSTom Herbert if (psock->rx_kcm) 259ab7ac4ebSTom Herbert return psock->rx_kcm; 260ab7ac4ebSTom Herbert 261ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 262ab7ac4ebSTom Herbert 263ab7ac4ebSTom Herbert if (psock->rx_kcm) { 264ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 265ab7ac4ebSTom Herbert return psock->rx_kcm; 266ab7ac4ebSTom Herbert } 267ab7ac4ebSTom Herbert 268cd6e111bSTom Herbert kcm_update_rx_mux_stats(mux, psock); 269cd6e111bSTom Herbert 270ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_rx_waiters)) { 271ab7ac4ebSTom Herbert psock->ready_rx_msg = head; 2729b73896aSTom Herbert strp_pause(&psock->strp); 273ab7ac4ebSTom Herbert list_add_tail(&psock->psock_ready_list, 274ab7ac4ebSTom Herbert &mux->psocks_ready); 275ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 276ab7ac4ebSTom Herbert return NULL; 277ab7ac4ebSTom Herbert } 278ab7ac4ebSTom Herbert 279ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_rx_waiters, 280ab7ac4ebSTom Herbert struct kcm_sock, wait_rx_list); 281ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 282ab7ac4ebSTom Herbert kcm->rx_wait = false; 283ab7ac4ebSTom Herbert 284ab7ac4ebSTom Herbert psock->rx_kcm = kcm; 285ab7ac4ebSTom Herbert kcm->rx_psock = psock; 286ab7ac4ebSTom Herbert 287ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 288ab7ac4ebSTom Herbert 289ab7ac4ebSTom Herbert return kcm; 290ab7ac4ebSTom Herbert } 291ab7ac4ebSTom Herbert 292ab7ac4ebSTom Herbert static void kcm_done(struct kcm_sock *kcm); 293ab7ac4ebSTom Herbert 294ab7ac4ebSTom Herbert static void kcm_done_work(struct work_struct *w) 295ab7ac4ebSTom Herbert { 296ab7ac4ebSTom Herbert kcm_done(container_of(w, struct kcm_sock, done_work)); 297ab7ac4ebSTom Herbert } 298ab7ac4ebSTom Herbert 299ab7ac4ebSTom Herbert /* Lower sock held */ 300ab7ac4ebSTom Herbert static void unreserve_rx_kcm(struct kcm_psock *psock, 301ab7ac4ebSTom Herbert bool rcv_ready) 302ab7ac4ebSTom Herbert { 303ab7ac4ebSTom Herbert struct kcm_sock *kcm = psock->rx_kcm; 304ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 305ab7ac4ebSTom Herbert 306ab7ac4ebSTom Herbert if (!kcm) 307ab7ac4ebSTom Herbert return; 308ab7ac4ebSTom Herbert 309ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 310ab7ac4ebSTom Herbert 311ab7ac4ebSTom Herbert psock->rx_kcm = NULL; 312ab7ac4ebSTom Herbert kcm->rx_psock = NULL; 313ab7ac4ebSTom Herbert 314ab7ac4ebSTom Herbert /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with 315ab7ac4ebSTom Herbert * kcm_rfree 316ab7ac4ebSTom Herbert */ 317ab7ac4ebSTom Herbert smp_mb(); 318ab7ac4ebSTom Herbert 319ab7ac4ebSTom Herbert if (unlikely(kcm->done)) { 320ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 321ab7ac4ebSTom Herbert 322ab7ac4ebSTom Herbert /* Need to run kcm_done in a task since we need to qcquire 323ab7ac4ebSTom Herbert * callback locks which may already be held here. 324ab7ac4ebSTom Herbert */ 325ab7ac4ebSTom Herbert INIT_WORK(&kcm->done_work, kcm_done_work); 326ab7ac4ebSTom Herbert schedule_work(&kcm->done_work); 327ab7ac4ebSTom Herbert return; 328ab7ac4ebSTom Herbert } 329ab7ac4ebSTom Herbert 330ab7ac4ebSTom Herbert if (unlikely(kcm->rx_disabled)) { 331ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); 332ab7ac4ebSTom Herbert } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { 333ab7ac4ebSTom Herbert /* Check for degenerative race with rx_wait that all 334ab7ac4ebSTom Herbert * data was dequeued (accounted for in kcm_rfree). 335ab7ac4ebSTom Herbert */ 336ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 337ab7ac4ebSTom Herbert } 338ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 339ab7ac4ebSTom Herbert } 340ab7ac4ebSTom Herbert 341ab7ac4ebSTom Herbert /* Lower sock lock held */ 34296a59083STom Herbert static void psock_data_ready(struct sock *sk) 343ab7ac4ebSTom Herbert { 344ab7ac4ebSTom Herbert struct kcm_psock *psock; 345ab7ac4ebSTom Herbert 346ab7ac4ebSTom Herbert read_lock_bh(&sk->sk_callback_lock); 347ab7ac4ebSTom Herbert 348ab7ac4ebSTom Herbert psock = (struct kcm_psock *)sk->sk_user_data; 3499b73896aSTom Herbert if (likely(psock)) 35096a59083STom Herbert strp_data_ready(&psock->strp); 351ab7ac4ebSTom Herbert 352ab7ac4ebSTom Herbert read_unlock_bh(&sk->sk_callback_lock); 353ab7ac4ebSTom Herbert } 354ab7ac4ebSTom Herbert 3559b73896aSTom Herbert /* Called with lower sock held */ 3569b73896aSTom Herbert static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) 357ab7ac4ebSTom Herbert { 3589b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3599b73896aSTom Herbert struct kcm_sock *kcm; 360ab7ac4ebSTom Herbert 3619b73896aSTom Herbert try_queue: 3629b73896aSTom Herbert kcm = reserve_rx_kcm(psock, skb); 3639b73896aSTom Herbert if (!kcm) { 3649b73896aSTom Herbert /* Unable to reserve a KCM, message is held in psock and strp 3659b73896aSTom Herbert * is paused. 366ab7ac4ebSTom Herbert */ 3679b73896aSTom Herbert return; 368ab7ac4ebSTom Herbert } 369ab7ac4ebSTom Herbert 3709b73896aSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 3719b73896aSTom Herbert /* Should mean socket buffer full */ 3729b73896aSTom Herbert unreserve_rx_kcm(psock, false); 3739b73896aSTom Herbert goto try_queue; 3749b73896aSTom Herbert } 375ab7ac4ebSTom Herbert } 376ab7ac4ebSTom Herbert 3779b73896aSTom Herbert static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) 378ab7ac4ebSTom Herbert { 3799b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3809b73896aSTom Herbert struct bpf_prog *prog = psock->bpf_prog; 3819b73896aSTom Herbert 3829b73896aSTom Herbert return (*prog->bpf_func)(skb, prog->insnsi); 3839b73896aSTom Herbert } 3849b73896aSTom Herbert 3859b73896aSTom Herbert static int kcm_read_sock_done(struct strparser *strp, int err) 3869b73896aSTom Herbert { 3879b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3889b73896aSTom Herbert 3899b73896aSTom Herbert unreserve_rx_kcm(psock, true); 3909b73896aSTom Herbert 3919b73896aSTom Herbert return err; 392ab7ac4ebSTom Herbert } 393ab7ac4ebSTom Herbert 39496a59083STom Herbert static void psock_state_change(struct sock *sk) 395ab7ac4ebSTom Herbert { 396a9a08845SLinus Torvalds /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here 397a9a08845SLinus Torvalds * since application will normally not poll with EPOLLIN 398ab7ac4ebSTom Herbert * on the TCP sockets. 399ab7ac4ebSTom Herbert */ 400ab7ac4ebSTom Herbert 401ab7ac4ebSTom Herbert report_csk_error(sk, EPIPE); 402ab7ac4ebSTom Herbert } 403ab7ac4ebSTom Herbert 40496a59083STom Herbert static void psock_write_space(struct sock *sk) 405ab7ac4ebSTom Herbert { 406ab7ac4ebSTom Herbert struct kcm_psock *psock; 407ab7ac4ebSTom Herbert struct kcm_mux *mux; 408ab7ac4ebSTom Herbert struct kcm_sock *kcm; 409ab7ac4ebSTom Herbert 410ab7ac4ebSTom Herbert read_lock_bh(&sk->sk_callback_lock); 411ab7ac4ebSTom Herbert 412ab7ac4ebSTom Herbert psock = (struct kcm_psock *)sk->sk_user_data; 413ab7ac4ebSTom Herbert if (unlikely(!psock)) 414ab7ac4ebSTom Herbert goto out; 415ab7ac4ebSTom Herbert mux = psock->mux; 416ab7ac4ebSTom Herbert 417ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 418ab7ac4ebSTom Herbert 419ab7ac4ebSTom Herbert /* Check if the socket is reserved so someone is waiting for sending. */ 420ab7ac4ebSTom Herbert kcm = psock->tx_kcm; 4219b73896aSTom Herbert if (kcm && !unlikely(kcm->tx_stopped)) 422ab7ac4ebSTom Herbert queue_work(kcm_wq, &kcm->tx_work); 423ab7ac4ebSTom Herbert 424ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 425ab7ac4ebSTom Herbert out: 426ab7ac4ebSTom Herbert read_unlock_bh(&sk->sk_callback_lock); 427ab7ac4ebSTom Herbert } 428ab7ac4ebSTom Herbert 429ab7ac4ebSTom Herbert static void unreserve_psock(struct kcm_sock *kcm); 430ab7ac4ebSTom Herbert 431ab7ac4ebSTom Herbert /* kcm sock is locked. */ 432ab7ac4ebSTom Herbert static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) 433ab7ac4ebSTom Herbert { 434ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 435ab7ac4ebSTom Herbert struct kcm_psock *psock; 436ab7ac4ebSTom Herbert 437ab7ac4ebSTom Herbert psock = kcm->tx_psock; 438ab7ac4ebSTom Herbert 439ab7ac4ebSTom Herbert smp_rmb(); /* Must read tx_psock before tx_wait */ 440ab7ac4ebSTom Herbert 441ab7ac4ebSTom Herbert if (psock) { 442ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 443ab7ac4ebSTom Herbert if (unlikely(psock->tx_stopped)) 444ab7ac4ebSTom Herbert unreserve_psock(kcm); 445ab7ac4ebSTom Herbert else 446ab7ac4ebSTom Herbert return kcm->tx_psock; 447ab7ac4ebSTom Herbert } 448ab7ac4ebSTom Herbert 449ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 450ab7ac4ebSTom Herbert 451ab7ac4ebSTom Herbert /* Check again under lock to see if psock was reserved for this 452ab7ac4ebSTom Herbert * psock via psock_unreserve. 453ab7ac4ebSTom Herbert */ 454ab7ac4ebSTom Herbert psock = kcm->tx_psock; 455ab7ac4ebSTom Herbert if (unlikely(psock)) { 456ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 457ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 458ab7ac4ebSTom Herbert return kcm->tx_psock; 459ab7ac4ebSTom Herbert } 460ab7ac4ebSTom Herbert 461ab7ac4ebSTom Herbert if (!list_empty(&mux->psocks_avail)) { 462ab7ac4ebSTom Herbert psock = list_first_entry(&mux->psocks_avail, 463ab7ac4ebSTom Herbert struct kcm_psock, 464ab7ac4ebSTom Herbert psock_avail_list); 465ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 466ab7ac4ebSTom Herbert if (kcm->tx_wait) { 467ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 468ab7ac4ebSTom Herbert kcm->tx_wait = false; 469ab7ac4ebSTom Herbert } 470ab7ac4ebSTom Herbert kcm->tx_psock = psock; 471ab7ac4ebSTom Herbert psock->tx_kcm = kcm; 472cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.reserved); 473ab7ac4ebSTom Herbert } else if (!kcm->tx_wait) { 474ab7ac4ebSTom Herbert list_add_tail(&kcm->wait_psock_list, 475ab7ac4ebSTom Herbert &mux->kcm_tx_waiters); 476ab7ac4ebSTom Herbert kcm->tx_wait = true; 477ab7ac4ebSTom Herbert } 478ab7ac4ebSTom Herbert 479ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 480ab7ac4ebSTom Herbert 481ab7ac4ebSTom Herbert return psock; 482ab7ac4ebSTom Herbert } 483ab7ac4ebSTom Herbert 484ab7ac4ebSTom Herbert /* mux lock held */ 485ab7ac4ebSTom Herbert static void psock_now_avail(struct kcm_psock *psock) 486ab7ac4ebSTom Herbert { 487ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 488ab7ac4ebSTom Herbert struct kcm_sock *kcm; 489ab7ac4ebSTom Herbert 490ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_tx_waiters)) { 491ab7ac4ebSTom Herbert list_add_tail(&psock->psock_avail_list, 492ab7ac4ebSTom Herbert &mux->psocks_avail); 493ab7ac4ebSTom Herbert } else { 494ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_tx_waiters, 495ab7ac4ebSTom Herbert struct kcm_sock, 496ab7ac4ebSTom Herbert wait_psock_list); 497ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 498ab7ac4ebSTom Herbert kcm->tx_wait = false; 499ab7ac4ebSTom Herbert psock->tx_kcm = kcm; 500ab7ac4ebSTom Herbert 501ab7ac4ebSTom Herbert /* Commit before changing tx_psock since that is read in 502ab7ac4ebSTom Herbert * reserve_psock before queuing work. 503ab7ac4ebSTom Herbert */ 504ab7ac4ebSTom Herbert smp_mb(); 505ab7ac4ebSTom Herbert 506ab7ac4ebSTom Herbert kcm->tx_psock = psock; 507cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.reserved); 508ab7ac4ebSTom Herbert queue_work(kcm_wq, &kcm->tx_work); 509ab7ac4ebSTom Herbert } 510ab7ac4ebSTom Herbert } 511ab7ac4ebSTom Herbert 512ab7ac4ebSTom Herbert /* kcm sock is locked. */ 513ab7ac4ebSTom Herbert static void unreserve_psock(struct kcm_sock *kcm) 514ab7ac4ebSTom Herbert { 515ab7ac4ebSTom Herbert struct kcm_psock *psock; 516ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 517ab7ac4ebSTom Herbert 518ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 519ab7ac4ebSTom Herbert 520ab7ac4ebSTom Herbert psock = kcm->tx_psock; 521ab7ac4ebSTom Herbert 522ab7ac4ebSTom Herbert if (WARN_ON(!psock)) { 523ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 524ab7ac4ebSTom Herbert return; 525ab7ac4ebSTom Herbert } 526ab7ac4ebSTom Herbert 527ab7ac4ebSTom Herbert smp_rmb(); /* Read tx_psock before tx_wait */ 528ab7ac4ebSTom Herbert 529cd6e111bSTom Herbert kcm_update_tx_mux_stats(mux, psock); 530cd6e111bSTom Herbert 531ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 532ab7ac4ebSTom Herbert 533ab7ac4ebSTom Herbert kcm->tx_psock = NULL; 534ab7ac4ebSTom Herbert psock->tx_kcm = NULL; 535cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.unreserved); 536ab7ac4ebSTom Herbert 537ab7ac4ebSTom Herbert if (unlikely(psock->tx_stopped)) { 538ab7ac4ebSTom Herbert if (psock->done) { 539ab7ac4ebSTom Herbert /* Deferred free */ 540ab7ac4ebSTom Herbert list_del(&psock->psock_list); 541ab7ac4ebSTom Herbert mux->psocks_cnt--; 542ab7ac4ebSTom Herbert sock_put(psock->sk); 543ab7ac4ebSTom Herbert fput(psock->sk->sk_socket->file); 544ab7ac4ebSTom Herbert kmem_cache_free(kcm_psockp, psock); 545ab7ac4ebSTom Herbert } 546ab7ac4ebSTom Herbert 547ab7ac4ebSTom Herbert /* Don't put back on available list */ 548ab7ac4ebSTom Herbert 549ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 550ab7ac4ebSTom Herbert 551ab7ac4ebSTom Herbert return; 552ab7ac4ebSTom Herbert } 553ab7ac4ebSTom Herbert 554ab7ac4ebSTom Herbert psock_now_avail(psock); 555ab7ac4ebSTom Herbert 556ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 557ab7ac4ebSTom Herbert } 558ab7ac4ebSTom Herbert 559cd6e111bSTom Herbert static void kcm_report_tx_retry(struct kcm_sock *kcm) 560cd6e111bSTom Herbert { 561cd6e111bSTom Herbert struct kcm_mux *mux = kcm->mux; 562cd6e111bSTom Herbert 563cd6e111bSTom Herbert spin_lock_bh(&mux->lock); 564cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.tx_retries); 565cd6e111bSTom Herbert spin_unlock_bh(&mux->lock); 566cd6e111bSTom Herbert } 567cd6e111bSTom Herbert 568ab7ac4ebSTom Herbert /* Write any messages ready on the kcm socket. Called with kcm sock lock 569ab7ac4ebSTom Herbert * held. Return bytes actually sent or error. 570ab7ac4ebSTom Herbert */ 571ab7ac4ebSTom Herbert static int kcm_write_msgs(struct kcm_sock *kcm) 572ab7ac4ebSTom Herbert { 573ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 574ab7ac4ebSTom Herbert struct kcm_psock *psock; 575ab7ac4ebSTom Herbert struct sk_buff *skb, *head; 576ab7ac4ebSTom Herbert struct kcm_tx_msg *txm; 577ab7ac4ebSTom Herbert unsigned short fragidx, frag_offset; 578ab7ac4ebSTom Herbert unsigned int sent, total_sent = 0; 579ab7ac4ebSTom Herbert int ret = 0; 580ab7ac4ebSTom Herbert 581ab7ac4ebSTom Herbert kcm->tx_wait_more = false; 582ab7ac4ebSTom Herbert psock = kcm->tx_psock; 583ab7ac4ebSTom Herbert if (unlikely(psock && psock->tx_stopped)) { 584ab7ac4ebSTom Herbert /* A reserved psock was aborted asynchronously. Unreserve 585ab7ac4ebSTom Herbert * it and we'll retry the message. 586ab7ac4ebSTom Herbert */ 587ab7ac4ebSTom Herbert unreserve_psock(kcm); 588cd6e111bSTom Herbert kcm_report_tx_retry(kcm); 589ab7ac4ebSTom Herbert if (skb_queue_empty(&sk->sk_write_queue)) 590ab7ac4ebSTom Herbert return 0; 591ab7ac4ebSTom Herbert 592ab7ac4ebSTom Herbert kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; 593ab7ac4ebSTom Herbert 594ab7ac4ebSTom Herbert } else if (skb_queue_empty(&sk->sk_write_queue)) { 595ab7ac4ebSTom Herbert return 0; 596ab7ac4ebSTom Herbert } 597ab7ac4ebSTom Herbert 598ab7ac4ebSTom Herbert head = skb_peek(&sk->sk_write_queue); 599ab7ac4ebSTom Herbert txm = kcm_tx_msg(head); 600ab7ac4ebSTom Herbert 601ab7ac4ebSTom Herbert if (txm->sent) { 602ab7ac4ebSTom Herbert /* Send of first skbuff in queue already in progress */ 603ab7ac4ebSTom Herbert if (WARN_ON(!psock)) { 604ab7ac4ebSTom Herbert ret = -EINVAL; 605ab7ac4ebSTom Herbert goto out; 606ab7ac4ebSTom Herbert } 607ab7ac4ebSTom Herbert sent = txm->sent; 608ab7ac4ebSTom Herbert frag_offset = txm->frag_offset; 609ab7ac4ebSTom Herbert fragidx = txm->fragidx; 610ab7ac4ebSTom Herbert skb = txm->frag_skb; 611ab7ac4ebSTom Herbert 612ab7ac4ebSTom Herbert goto do_frag; 613ab7ac4ebSTom Herbert } 614ab7ac4ebSTom Herbert 615ab7ac4ebSTom Herbert try_again: 616ab7ac4ebSTom Herbert psock = reserve_psock(kcm); 617ab7ac4ebSTom Herbert if (!psock) 618ab7ac4ebSTom Herbert goto out; 619ab7ac4ebSTom Herbert 620ab7ac4ebSTom Herbert do { 621ab7ac4ebSTom Herbert skb = head; 622ab7ac4ebSTom Herbert txm = kcm_tx_msg(head); 623ab7ac4ebSTom Herbert sent = 0; 624ab7ac4ebSTom Herbert 625ab7ac4ebSTom Herbert do_frag_list: 626ab7ac4ebSTom Herbert if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { 627ab7ac4ebSTom Herbert ret = -EINVAL; 628ab7ac4ebSTom Herbert goto out; 629ab7ac4ebSTom Herbert } 630ab7ac4ebSTom Herbert 631ab7ac4ebSTom Herbert for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; 632ab7ac4ebSTom Herbert fragidx++) { 633ab7ac4ebSTom Herbert skb_frag_t *frag; 634ab7ac4ebSTom Herbert 635ab7ac4ebSTom Herbert frag_offset = 0; 636ab7ac4ebSTom Herbert do_frag: 637ab7ac4ebSTom Herbert frag = &skb_shinfo(skb)->frags[fragidx]; 638ab7ac4ebSTom Herbert if (WARN_ON(!frag->size)) { 639ab7ac4ebSTom Herbert ret = -EINVAL; 640ab7ac4ebSTom Herbert goto out; 641ab7ac4ebSTom Herbert } 642ab7ac4ebSTom Herbert 643ab7ac4ebSTom Herbert ret = kernel_sendpage(psock->sk->sk_socket, 644ab7ac4ebSTom Herbert frag->page.p, 645ab7ac4ebSTom Herbert frag->page_offset + frag_offset, 646ab7ac4ebSTom Herbert frag->size - frag_offset, 647ab7ac4ebSTom Herbert MSG_DONTWAIT); 648ab7ac4ebSTom Herbert if (ret <= 0) { 649ab7ac4ebSTom Herbert if (ret == -EAGAIN) { 650ab7ac4ebSTom Herbert /* Save state to try again when there's 651ab7ac4ebSTom Herbert * write space on the socket 652ab7ac4ebSTom Herbert */ 653ab7ac4ebSTom Herbert txm->sent = sent; 654ab7ac4ebSTom Herbert txm->frag_offset = frag_offset; 655ab7ac4ebSTom Herbert txm->fragidx = fragidx; 656ab7ac4ebSTom Herbert txm->frag_skb = skb; 657ab7ac4ebSTom Herbert 658ab7ac4ebSTom Herbert ret = 0; 659ab7ac4ebSTom Herbert goto out; 660ab7ac4ebSTom Herbert } 661ab7ac4ebSTom Herbert 662ab7ac4ebSTom Herbert /* Hard failure in sending message, abort this 663ab7ac4ebSTom Herbert * psock since it has lost framing 664ab7ac4ebSTom Herbert * synchonization and retry sending the 665ab7ac4ebSTom Herbert * message from the beginning. 666ab7ac4ebSTom Herbert */ 667ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, 668ab7ac4ebSTom Herbert true); 669ab7ac4ebSTom Herbert unreserve_psock(kcm); 670ab7ac4ebSTom Herbert 671ab7ac4ebSTom Herbert txm->sent = 0; 672cd6e111bSTom Herbert kcm_report_tx_retry(kcm); 673ab7ac4ebSTom Herbert ret = 0; 674ab7ac4ebSTom Herbert 675ab7ac4ebSTom Herbert goto try_again; 676ab7ac4ebSTom Herbert } 677ab7ac4ebSTom Herbert 678ab7ac4ebSTom Herbert sent += ret; 679ab7ac4ebSTom Herbert frag_offset += ret; 680cd6e111bSTom Herbert KCM_STATS_ADD(psock->stats.tx_bytes, ret); 681ab7ac4ebSTom Herbert if (frag_offset < frag->size) { 682ab7ac4ebSTom Herbert /* Not finished with this frag */ 683ab7ac4ebSTom Herbert goto do_frag; 684ab7ac4ebSTom Herbert } 685ab7ac4ebSTom Herbert } 686ab7ac4ebSTom Herbert 687ab7ac4ebSTom Herbert if (skb == head) { 688ab7ac4ebSTom Herbert if (skb_has_frag_list(skb)) { 689ab7ac4ebSTom Herbert skb = skb_shinfo(skb)->frag_list; 690ab7ac4ebSTom Herbert goto do_frag_list; 691ab7ac4ebSTom Herbert } 692ab7ac4ebSTom Herbert } else if (skb->next) { 693ab7ac4ebSTom Herbert skb = skb->next; 694ab7ac4ebSTom Herbert goto do_frag_list; 695ab7ac4ebSTom Herbert } 696ab7ac4ebSTom Herbert 697ab7ac4ebSTom Herbert /* Successfully sent the whole packet, account for it. */ 698ab7ac4ebSTom Herbert skb_dequeue(&sk->sk_write_queue); 699ab7ac4ebSTom Herbert kfree_skb(head); 700ab7ac4ebSTom Herbert sk->sk_wmem_queued -= sent; 701ab7ac4ebSTom Herbert total_sent += sent; 702cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.tx_msgs); 703ab7ac4ebSTom Herbert } while ((head = skb_peek(&sk->sk_write_queue))); 704ab7ac4ebSTom Herbert out: 705ab7ac4ebSTom Herbert if (!head) { 706ab7ac4ebSTom Herbert /* Done with all queued messages. */ 707ab7ac4ebSTom Herbert WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); 708ab7ac4ebSTom Herbert unreserve_psock(kcm); 709ab7ac4ebSTom Herbert } 710ab7ac4ebSTom Herbert 711ab7ac4ebSTom Herbert /* Check if write space is available */ 712ab7ac4ebSTom Herbert sk->sk_write_space(sk); 713ab7ac4ebSTom Herbert 714ab7ac4ebSTom Herbert return total_sent ? : ret; 715ab7ac4ebSTom Herbert } 716ab7ac4ebSTom Herbert 717ab7ac4ebSTom Herbert static void kcm_tx_work(struct work_struct *w) 718ab7ac4ebSTom Herbert { 719ab7ac4ebSTom Herbert struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); 720ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 721ab7ac4ebSTom Herbert int err; 722ab7ac4ebSTom Herbert 723ab7ac4ebSTom Herbert lock_sock(sk); 724ab7ac4ebSTom Herbert 725ab7ac4ebSTom Herbert /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx 726ab7ac4ebSTom Herbert * aborts 727ab7ac4ebSTom Herbert */ 728ab7ac4ebSTom Herbert err = kcm_write_msgs(kcm); 729ab7ac4ebSTom Herbert if (err < 0) { 730ab7ac4ebSTom Herbert /* Hard failure in write, report error on KCM socket */ 731ab7ac4ebSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); 732ab7ac4ebSTom Herbert report_csk_error(&kcm->sk, -err); 733ab7ac4ebSTom Herbert goto out; 734ab7ac4ebSTom Herbert } 735ab7ac4ebSTom Herbert 736ab7ac4ebSTom Herbert /* Primarily for SOCK_SEQPACKET sockets */ 737ab7ac4ebSTom Herbert if (likely(sk->sk_socket) && 738ab7ac4ebSTom Herbert test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 739ab7ac4ebSTom Herbert clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 740ab7ac4ebSTom Herbert sk->sk_write_space(sk); 741ab7ac4ebSTom Herbert } 742ab7ac4ebSTom Herbert 743ab7ac4ebSTom Herbert out: 744ab7ac4ebSTom Herbert release_sock(sk); 745ab7ac4ebSTom Herbert } 746ab7ac4ebSTom Herbert 747ab7ac4ebSTom Herbert static void kcm_push(struct kcm_sock *kcm) 748ab7ac4ebSTom Herbert { 749ab7ac4ebSTom Herbert if (kcm->tx_wait_more) 750ab7ac4ebSTom Herbert kcm_write_msgs(kcm); 751ab7ac4ebSTom Herbert } 752ab7ac4ebSTom Herbert 753f29698fcSTom Herbert static ssize_t kcm_sendpage(struct socket *sock, struct page *page, 754f29698fcSTom Herbert int offset, size_t size, int flags) 755f29698fcSTom Herbert 756f29698fcSTom Herbert { 757f29698fcSTom Herbert struct sock *sk = sock->sk; 758f29698fcSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 759f29698fcSTom Herbert struct sk_buff *skb = NULL, *head = NULL; 760f29698fcSTom Herbert long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 761f29698fcSTom Herbert bool eor; 762f29698fcSTom Herbert int err = 0; 763f29698fcSTom Herbert int i; 764f29698fcSTom Herbert 765f29698fcSTom Herbert if (flags & MSG_SENDPAGE_NOTLAST) 766f29698fcSTom Herbert flags |= MSG_MORE; 767f29698fcSTom Herbert 768f29698fcSTom Herbert /* No MSG_EOR from splice, only look at MSG_MORE */ 769f29698fcSTom Herbert eor = !(flags & MSG_MORE); 770f29698fcSTom Herbert 771f29698fcSTom Herbert lock_sock(sk); 772f29698fcSTom Herbert 773f29698fcSTom Herbert sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 774f29698fcSTom Herbert 775f29698fcSTom Herbert err = -EPIPE; 776f29698fcSTom Herbert if (sk->sk_err) 777f29698fcSTom Herbert goto out_error; 778f29698fcSTom Herbert 779f29698fcSTom Herbert if (kcm->seq_skb) { 780f29698fcSTom Herbert /* Previously opened message */ 781f29698fcSTom Herbert head = kcm->seq_skb; 782f29698fcSTom Herbert skb = kcm_tx_msg(head)->last_skb; 783f29698fcSTom Herbert i = skb_shinfo(skb)->nr_frags; 784f29698fcSTom Herbert 785f29698fcSTom Herbert if (skb_can_coalesce(skb, i, page, offset)) { 786f29698fcSTom Herbert skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); 787f29698fcSTom Herbert skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 788f29698fcSTom Herbert goto coalesced; 789f29698fcSTom Herbert } 790f29698fcSTom Herbert 791f29698fcSTom Herbert if (i >= MAX_SKB_FRAGS) { 792f29698fcSTom Herbert struct sk_buff *tskb; 793f29698fcSTom Herbert 794f29698fcSTom Herbert tskb = alloc_skb(0, sk->sk_allocation); 795f29698fcSTom Herbert while (!tskb) { 796f29698fcSTom Herbert kcm_push(kcm); 797f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 798f29698fcSTom Herbert if (err) 799f29698fcSTom Herbert goto out_error; 800f29698fcSTom Herbert } 801f29698fcSTom Herbert 802f29698fcSTom Herbert if (head == skb) 803f29698fcSTom Herbert skb_shinfo(head)->frag_list = tskb; 804f29698fcSTom Herbert else 805f29698fcSTom Herbert skb->next = tskb; 806f29698fcSTom Herbert 807f29698fcSTom Herbert skb = tskb; 808f29698fcSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 809f29698fcSTom Herbert i = 0; 810f29698fcSTom Herbert } 811f29698fcSTom Herbert } else { 812f29698fcSTom Herbert /* Call the sk_stream functions to manage the sndbuf mem. */ 813f29698fcSTom Herbert if (!sk_stream_memory_free(sk)) { 814f29698fcSTom Herbert kcm_push(kcm); 815f29698fcSTom Herbert set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 816f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 817f29698fcSTom Herbert if (err) 818f29698fcSTom Herbert goto out_error; 819f29698fcSTom Herbert } 820f29698fcSTom Herbert 821f29698fcSTom Herbert head = alloc_skb(0, sk->sk_allocation); 822f29698fcSTom Herbert while (!head) { 823f29698fcSTom Herbert kcm_push(kcm); 824f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 825f29698fcSTom Herbert if (err) 826f29698fcSTom Herbert goto out_error; 827f29698fcSTom Herbert } 828f29698fcSTom Herbert 829f29698fcSTom Herbert skb = head; 830f29698fcSTom Herbert i = 0; 831f29698fcSTom Herbert } 832f29698fcSTom Herbert 833f29698fcSTom Herbert get_page(page); 834f29698fcSTom Herbert skb_fill_page_desc(skb, i, page, offset, size); 835f29698fcSTom Herbert skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 836f29698fcSTom Herbert 837f29698fcSTom Herbert coalesced: 838f29698fcSTom Herbert skb->len += size; 839f29698fcSTom Herbert skb->data_len += size; 840f29698fcSTom Herbert skb->truesize += size; 841f29698fcSTom Herbert sk->sk_wmem_queued += size; 842f29698fcSTom Herbert sk_mem_charge(sk, size); 843f29698fcSTom Herbert 844f29698fcSTom Herbert if (head != skb) { 845f29698fcSTom Herbert head->len += size; 846f29698fcSTom Herbert head->data_len += size; 847f29698fcSTom Herbert head->truesize += size; 848f29698fcSTom Herbert } 849f29698fcSTom Herbert 850f29698fcSTom Herbert if (eor) { 851f29698fcSTom Herbert bool not_busy = skb_queue_empty(&sk->sk_write_queue); 852f29698fcSTom Herbert 853f29698fcSTom Herbert /* Message complete, queue it on send buffer */ 854f29698fcSTom Herbert __skb_queue_tail(&sk->sk_write_queue, head); 855f29698fcSTom Herbert kcm->seq_skb = NULL; 856f29698fcSTom Herbert KCM_STATS_INCR(kcm->stats.tx_msgs); 857f29698fcSTom Herbert 858f29698fcSTom Herbert if (flags & MSG_BATCH) { 859f29698fcSTom Herbert kcm->tx_wait_more = true; 860f29698fcSTom Herbert } else if (kcm->tx_wait_more || not_busy) { 861f29698fcSTom Herbert err = kcm_write_msgs(kcm); 862f29698fcSTom Herbert if (err < 0) { 863f29698fcSTom Herbert /* We got a hard error in write_msgs but have 864f29698fcSTom Herbert * already queued this message. Report an error 865f29698fcSTom Herbert * in the socket, but don't affect return value 866f29698fcSTom Herbert * from sendmsg 867f29698fcSTom Herbert */ 868f29698fcSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs\n"); 869f29698fcSTom Herbert report_csk_error(&kcm->sk, -err); 870f29698fcSTom Herbert } 871f29698fcSTom Herbert } 872f29698fcSTom Herbert } else { 873f29698fcSTom Herbert /* Message not complete, save state */ 874f29698fcSTom Herbert kcm->seq_skb = head; 875f29698fcSTom Herbert kcm_tx_msg(head)->last_skb = skb; 876f29698fcSTom Herbert } 877f29698fcSTom Herbert 878f29698fcSTom Herbert KCM_STATS_ADD(kcm->stats.tx_bytes, size); 879f29698fcSTom Herbert 880f29698fcSTom Herbert release_sock(sk); 881f29698fcSTom Herbert return size; 882f29698fcSTom Herbert 883f29698fcSTom Herbert out_error: 884f29698fcSTom Herbert kcm_push(kcm); 885f29698fcSTom Herbert 886f29698fcSTom Herbert err = sk_stream_error(sk, flags, err); 887f29698fcSTom Herbert 888f29698fcSTom Herbert /* make sure we wake any epoll edge trigger waiter */ 889f29698fcSTom Herbert if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 890f29698fcSTom Herbert sk->sk_write_space(sk); 891f29698fcSTom Herbert 892f29698fcSTom Herbert release_sock(sk); 893f29698fcSTom Herbert return err; 894f29698fcSTom Herbert } 895f29698fcSTom Herbert 896ab7ac4ebSTom Herbert static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 897ab7ac4ebSTom Herbert { 898ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 899ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 900ab7ac4ebSTom Herbert struct sk_buff *skb = NULL, *head = NULL; 901ab7ac4ebSTom Herbert size_t copy, copied = 0; 902ab7ac4ebSTom Herbert long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 903ab7ac4ebSTom Herbert int eor = (sock->type == SOCK_DGRAM) ? 904ab7ac4ebSTom Herbert !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); 905ab7ac4ebSTom Herbert int err = -EPIPE; 906ab7ac4ebSTom Herbert 907ab7ac4ebSTom Herbert lock_sock(sk); 908ab7ac4ebSTom Herbert 909ab7ac4ebSTom Herbert /* Per tcp_sendmsg this should be in poll */ 910ab7ac4ebSTom Herbert sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 911ab7ac4ebSTom Herbert 912ab7ac4ebSTom Herbert if (sk->sk_err) 913ab7ac4ebSTom Herbert goto out_error; 914ab7ac4ebSTom Herbert 915ab7ac4ebSTom Herbert if (kcm->seq_skb) { 916ab7ac4ebSTom Herbert /* Previously opened message */ 917ab7ac4ebSTom Herbert head = kcm->seq_skb; 918ab7ac4ebSTom Herbert skb = kcm_tx_msg(head)->last_skb; 919ab7ac4ebSTom Herbert goto start; 920ab7ac4ebSTom Herbert } 921ab7ac4ebSTom Herbert 922ab7ac4ebSTom Herbert /* Call the sk_stream functions to manage the sndbuf mem. */ 923ab7ac4ebSTom Herbert if (!sk_stream_memory_free(sk)) { 924ab7ac4ebSTom Herbert kcm_push(kcm); 925ab7ac4ebSTom Herbert set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 926ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 927ab7ac4ebSTom Herbert if (err) 928ab7ac4ebSTom Herbert goto out_error; 929ab7ac4ebSTom Herbert } 930ab7ac4ebSTom Herbert 93198e3862cSWANG Cong if (msg_data_left(msg)) { 932ab7ac4ebSTom Herbert /* New message, alloc head skb */ 933ab7ac4ebSTom Herbert head = alloc_skb(0, sk->sk_allocation); 934ab7ac4ebSTom Herbert while (!head) { 935ab7ac4ebSTom Herbert kcm_push(kcm); 936ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 937ab7ac4ebSTom Herbert if (err) 938ab7ac4ebSTom Herbert goto out_error; 939ab7ac4ebSTom Herbert 940ab7ac4ebSTom Herbert head = alloc_skb(0, sk->sk_allocation); 941ab7ac4ebSTom Herbert } 942ab7ac4ebSTom Herbert 943ab7ac4ebSTom Herbert skb = head; 944ab7ac4ebSTom Herbert 945ab7ac4ebSTom Herbert /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling 946ab7ac4ebSTom Herbert * csum_and_copy_from_iter from skb_do_copy_data_nocache. 947ab7ac4ebSTom Herbert */ 948ab7ac4ebSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 94998e3862cSWANG Cong } 950ab7ac4ebSTom Herbert 951ab7ac4ebSTom Herbert start: 952ab7ac4ebSTom Herbert while (msg_data_left(msg)) { 953ab7ac4ebSTom Herbert bool merge = true; 954ab7ac4ebSTom Herbert int i = skb_shinfo(skb)->nr_frags; 955ab7ac4ebSTom Herbert struct page_frag *pfrag = sk_page_frag(sk); 956ab7ac4ebSTom Herbert 957ab7ac4ebSTom Herbert if (!sk_page_frag_refill(sk, pfrag)) 958ab7ac4ebSTom Herbert goto wait_for_memory; 959ab7ac4ebSTom Herbert 960ab7ac4ebSTom Herbert if (!skb_can_coalesce(skb, i, pfrag->page, 961ab7ac4ebSTom Herbert pfrag->offset)) { 962ab7ac4ebSTom Herbert if (i == MAX_SKB_FRAGS) { 963ab7ac4ebSTom Herbert struct sk_buff *tskb; 964ab7ac4ebSTom Herbert 965ab7ac4ebSTom Herbert tskb = alloc_skb(0, sk->sk_allocation); 966ab7ac4ebSTom Herbert if (!tskb) 967ab7ac4ebSTom Herbert goto wait_for_memory; 968ab7ac4ebSTom Herbert 969ab7ac4ebSTom Herbert if (head == skb) 970ab7ac4ebSTom Herbert skb_shinfo(head)->frag_list = tskb; 971ab7ac4ebSTom Herbert else 972ab7ac4ebSTom Herbert skb->next = tskb; 973ab7ac4ebSTom Herbert 974ab7ac4ebSTom Herbert skb = tskb; 975ab7ac4ebSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 976ab7ac4ebSTom Herbert continue; 977ab7ac4ebSTom Herbert } 978ab7ac4ebSTom Herbert merge = false; 979ab7ac4ebSTom Herbert } 980ab7ac4ebSTom Herbert 981ab7ac4ebSTom Herbert copy = min_t(int, msg_data_left(msg), 982ab7ac4ebSTom Herbert pfrag->size - pfrag->offset); 983ab7ac4ebSTom Herbert 984ab7ac4ebSTom Herbert if (!sk_wmem_schedule(sk, copy)) 985ab7ac4ebSTom Herbert goto wait_for_memory; 986ab7ac4ebSTom Herbert 987ab7ac4ebSTom Herbert err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, 988ab7ac4ebSTom Herbert pfrag->page, 989ab7ac4ebSTom Herbert pfrag->offset, 990ab7ac4ebSTom Herbert copy); 991ab7ac4ebSTom Herbert if (err) 992ab7ac4ebSTom Herbert goto out_error; 993ab7ac4ebSTom Herbert 994ab7ac4ebSTom Herbert /* Update the skb. */ 995ab7ac4ebSTom Herbert if (merge) { 996ab7ac4ebSTom Herbert skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 997ab7ac4ebSTom Herbert } else { 998ab7ac4ebSTom Herbert skb_fill_page_desc(skb, i, pfrag->page, 999ab7ac4ebSTom Herbert pfrag->offset, copy); 1000ab7ac4ebSTom Herbert get_page(pfrag->page); 1001ab7ac4ebSTom Herbert } 1002ab7ac4ebSTom Herbert 1003ab7ac4ebSTom Herbert pfrag->offset += copy; 1004ab7ac4ebSTom Herbert copied += copy; 1005ab7ac4ebSTom Herbert if (head != skb) { 1006ab7ac4ebSTom Herbert head->len += copy; 1007ab7ac4ebSTom Herbert head->data_len += copy; 1008ab7ac4ebSTom Herbert } 1009ab7ac4ebSTom Herbert 1010ab7ac4ebSTom Herbert continue; 1011ab7ac4ebSTom Herbert 1012ab7ac4ebSTom Herbert wait_for_memory: 1013ab7ac4ebSTom Herbert kcm_push(kcm); 1014ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 1015ab7ac4ebSTom Herbert if (err) 1016ab7ac4ebSTom Herbert goto out_error; 1017ab7ac4ebSTom Herbert } 1018ab7ac4ebSTom Herbert 1019ab7ac4ebSTom Herbert if (eor) { 1020ab7ac4ebSTom Herbert bool not_busy = skb_queue_empty(&sk->sk_write_queue); 1021ab7ac4ebSTom Herbert 102298e3862cSWANG Cong if (head) { 1023ab7ac4ebSTom Herbert /* Message complete, queue it on send buffer */ 1024ab7ac4ebSTom Herbert __skb_queue_tail(&sk->sk_write_queue, head); 1025ab7ac4ebSTom Herbert kcm->seq_skb = NULL; 1026cd6e111bSTom Herbert KCM_STATS_INCR(kcm->stats.tx_msgs); 102798e3862cSWANG Cong } 1028ab7ac4ebSTom Herbert 1029ab7ac4ebSTom Herbert if (msg->msg_flags & MSG_BATCH) { 1030ab7ac4ebSTom Herbert kcm->tx_wait_more = true; 1031ab7ac4ebSTom Herbert } else if (kcm->tx_wait_more || not_busy) { 1032ab7ac4ebSTom Herbert err = kcm_write_msgs(kcm); 1033ab7ac4ebSTom Herbert if (err < 0) { 1034ab7ac4ebSTom Herbert /* We got a hard error in write_msgs but have 1035ab7ac4ebSTom Herbert * already queued this message. Report an error 1036ab7ac4ebSTom Herbert * in the socket, but don't affect return value 1037ab7ac4ebSTom Herbert * from sendmsg 1038ab7ac4ebSTom Herbert */ 1039ab7ac4ebSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs\n"); 1040ab7ac4ebSTom Herbert report_csk_error(&kcm->sk, -err); 1041ab7ac4ebSTom Herbert } 1042ab7ac4ebSTom Herbert } 1043ab7ac4ebSTom Herbert } else { 1044ab7ac4ebSTom Herbert /* Message not complete, save state */ 1045ab7ac4ebSTom Herbert partial_message: 1046cd27b96bSWANG Cong if (head) { 1047ab7ac4ebSTom Herbert kcm->seq_skb = head; 1048ab7ac4ebSTom Herbert kcm_tx_msg(head)->last_skb = skb; 1049ab7ac4ebSTom Herbert } 1050cd27b96bSWANG Cong } 1051ab7ac4ebSTom Herbert 1052cd6e111bSTom Herbert KCM_STATS_ADD(kcm->stats.tx_bytes, copied); 1053cd6e111bSTom Herbert 1054ab7ac4ebSTom Herbert release_sock(sk); 1055ab7ac4ebSTom Herbert return copied; 1056ab7ac4ebSTom Herbert 1057ab7ac4ebSTom Herbert out_error: 1058ab7ac4ebSTom Herbert kcm_push(kcm); 1059ab7ac4ebSTom Herbert 1060ab7ac4ebSTom Herbert if (copied && sock->type == SOCK_SEQPACKET) { 1061ab7ac4ebSTom Herbert /* Wrote some bytes before encountering an 1062ab7ac4ebSTom Herbert * error, return partial success. 1063ab7ac4ebSTom Herbert */ 1064ab7ac4ebSTom Herbert goto partial_message; 1065ab7ac4ebSTom Herbert } 1066ab7ac4ebSTom Herbert 1067ab7ac4ebSTom Herbert if (head != kcm->seq_skb) 1068ab7ac4ebSTom Herbert kfree_skb(head); 1069ab7ac4ebSTom Herbert 1070ab7ac4ebSTom Herbert err = sk_stream_error(sk, msg->msg_flags, err); 1071ab7ac4ebSTom Herbert 1072ab7ac4ebSTom Herbert /* make sure we wake any epoll edge trigger waiter */ 1073ab7ac4ebSTom Herbert if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 1074ab7ac4ebSTom Herbert sk->sk_write_space(sk); 1075ab7ac4ebSTom Herbert 1076ab7ac4ebSTom Herbert release_sock(sk); 1077ab7ac4ebSTom Herbert return err; 1078ab7ac4ebSTom Herbert } 1079ab7ac4ebSTom Herbert 1080ab7ac4ebSTom Herbert static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, 1081ab7ac4ebSTom Herbert long timeo, int *err) 1082ab7ac4ebSTom Herbert { 1083ab7ac4ebSTom Herbert struct sk_buff *skb; 1084ab7ac4ebSTom Herbert 1085ab7ac4ebSTom Herbert while (!(skb = skb_peek(&sk->sk_receive_queue))) { 1086ab7ac4ebSTom Herbert if (sk->sk_err) { 1087ab7ac4ebSTom Herbert *err = sock_error(sk); 1088ab7ac4ebSTom Herbert return NULL; 1089ab7ac4ebSTom Herbert } 1090ab7ac4ebSTom Herbert 1091ab7ac4ebSTom Herbert if (sock_flag(sk, SOCK_DONE)) 1092ab7ac4ebSTom Herbert return NULL; 1093ab7ac4ebSTom Herbert 1094ab7ac4ebSTom Herbert if ((flags & MSG_DONTWAIT) || !timeo) { 1095ab7ac4ebSTom Herbert *err = -EAGAIN; 1096ab7ac4ebSTom Herbert return NULL; 1097ab7ac4ebSTom Herbert } 1098ab7ac4ebSTom Herbert 1099ab7ac4ebSTom Herbert sk_wait_data(sk, &timeo, NULL); 1100ab7ac4ebSTom Herbert 1101ab7ac4ebSTom Herbert /* Handle signals */ 1102ab7ac4ebSTom Herbert if (signal_pending(current)) { 1103ab7ac4ebSTom Herbert *err = sock_intr_errno(timeo); 1104ab7ac4ebSTom Herbert return NULL; 1105ab7ac4ebSTom Herbert } 1106ab7ac4ebSTom Herbert } 1107ab7ac4ebSTom Herbert 1108ab7ac4ebSTom Herbert return skb; 1109ab7ac4ebSTom Herbert } 1110ab7ac4ebSTom Herbert 1111ab7ac4ebSTom Herbert static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, 1112ab7ac4ebSTom Herbert size_t len, int flags) 1113ab7ac4ebSTom Herbert { 1114ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 1115cd6e111bSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 1116ab7ac4ebSTom Herbert int err = 0; 1117ab7ac4ebSTom Herbert long timeo; 1118bbb03029STom Herbert struct strp_msg *stm; 1119ab7ac4ebSTom Herbert int copied = 0; 1120ab7ac4ebSTom Herbert struct sk_buff *skb; 1121ab7ac4ebSTom Herbert 1122ab7ac4ebSTom Herbert timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1123ab7ac4ebSTom Herbert 1124ab7ac4ebSTom Herbert lock_sock(sk); 1125ab7ac4ebSTom Herbert 1126ab7ac4ebSTom Herbert skb = kcm_wait_data(sk, flags, timeo, &err); 1127ab7ac4ebSTom Herbert if (!skb) 1128ab7ac4ebSTom Herbert goto out; 1129ab7ac4ebSTom Herbert 1130ab7ac4ebSTom Herbert /* Okay, have a message on the receive queue */ 1131ab7ac4ebSTom Herbert 1132bbb03029STom Herbert stm = strp_msg(skb); 1133ab7ac4ebSTom Herbert 1134bbb03029STom Herbert if (len > stm->full_len) 1135bbb03029STom Herbert len = stm->full_len; 1136ab7ac4ebSTom Herbert 1137bbb03029STom Herbert err = skb_copy_datagram_msg(skb, stm->offset, msg, len); 1138ab7ac4ebSTom Herbert if (err < 0) 1139ab7ac4ebSTom Herbert goto out; 1140ab7ac4ebSTom Herbert 1141ab7ac4ebSTom Herbert copied = len; 1142ab7ac4ebSTom Herbert if (likely(!(flags & MSG_PEEK))) { 1143cd6e111bSTom Herbert KCM_STATS_ADD(kcm->stats.rx_bytes, copied); 1144bbb03029STom Herbert if (copied < stm->full_len) { 1145ab7ac4ebSTom Herbert if (sock->type == SOCK_DGRAM) { 1146ab7ac4ebSTom Herbert /* Truncated message */ 1147ab7ac4ebSTom Herbert msg->msg_flags |= MSG_TRUNC; 1148ab7ac4ebSTom Herbert goto msg_finished; 1149ab7ac4ebSTom Herbert } 1150bbb03029STom Herbert stm->offset += copied; 1151bbb03029STom Herbert stm->full_len -= copied; 1152ab7ac4ebSTom Herbert } else { 1153ab7ac4ebSTom Herbert msg_finished: 1154ab7ac4ebSTom Herbert /* Finished with message */ 1155ab7ac4ebSTom Herbert msg->msg_flags |= MSG_EOR; 1156cd6e111bSTom Herbert KCM_STATS_INCR(kcm->stats.rx_msgs); 1157ab7ac4ebSTom Herbert skb_unlink(skb, &sk->sk_receive_queue); 1158ab7ac4ebSTom Herbert kfree_skb(skb); 1159ab7ac4ebSTom Herbert } 1160ab7ac4ebSTom Herbert } 1161ab7ac4ebSTom Herbert 1162ab7ac4ebSTom Herbert out: 1163ab7ac4ebSTom Herbert release_sock(sk); 1164ab7ac4ebSTom Herbert 1165ab7ac4ebSTom Herbert return copied ? : err; 1166ab7ac4ebSTom Herbert } 1167ab7ac4ebSTom Herbert 116891687355STom Herbert static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, 116991687355STom Herbert struct pipe_inode_info *pipe, size_t len, 117091687355STom Herbert unsigned int flags) 117191687355STom Herbert { 117291687355STom Herbert struct sock *sk = sock->sk; 117391687355STom Herbert struct kcm_sock *kcm = kcm_sk(sk); 117491687355STom Herbert long timeo; 1175bbb03029STom Herbert struct strp_msg *stm; 117691687355STom Herbert int err = 0; 1177f1971a2eSWANG Cong ssize_t copied; 117891687355STom Herbert struct sk_buff *skb; 117991687355STom Herbert 118091687355STom Herbert /* Only support splice for SOCKSEQPACKET */ 118191687355STom Herbert 118291687355STom Herbert timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 118391687355STom Herbert 118491687355STom Herbert lock_sock(sk); 118591687355STom Herbert 118691687355STom Herbert skb = kcm_wait_data(sk, flags, timeo, &err); 118791687355STom Herbert if (!skb) 118891687355STom Herbert goto err_out; 118991687355STom Herbert 119091687355STom Herbert /* Okay, have a message on the receive queue */ 119191687355STom Herbert 1192bbb03029STom Herbert stm = strp_msg(skb); 119391687355STom Herbert 1194bbb03029STom Herbert if (len > stm->full_len) 1195bbb03029STom Herbert len = stm->full_len; 119691687355STom Herbert 1197bbb03029STom Herbert copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags); 119891687355STom Herbert if (copied < 0) { 119991687355STom Herbert err = copied; 120091687355STom Herbert goto err_out; 120191687355STom Herbert } 120291687355STom Herbert 120391687355STom Herbert KCM_STATS_ADD(kcm->stats.rx_bytes, copied); 120491687355STom Herbert 1205bbb03029STom Herbert stm->offset += copied; 1206bbb03029STom Herbert stm->full_len -= copied; 120791687355STom Herbert 120891687355STom Herbert /* We have no way to return MSG_EOR. If all the bytes have been 120991687355STom Herbert * read we still leave the message in the receive socket buffer. 121091687355STom Herbert * A subsequent recvmsg needs to be done to return MSG_EOR and 121191687355STom Herbert * finish reading the message. 121291687355STom Herbert */ 121391687355STom Herbert 121491687355STom Herbert release_sock(sk); 121591687355STom Herbert 121691687355STom Herbert return copied; 121791687355STom Herbert 121891687355STom Herbert err_out: 121991687355STom Herbert release_sock(sk); 122091687355STom Herbert 122191687355STom Herbert return err; 122291687355STom Herbert } 122391687355STom Herbert 1224ab7ac4ebSTom Herbert /* kcm sock lock held */ 1225ab7ac4ebSTom Herbert static void kcm_recv_disable(struct kcm_sock *kcm) 1226ab7ac4ebSTom Herbert { 1227ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1228ab7ac4ebSTom Herbert 1229ab7ac4ebSTom Herbert if (kcm->rx_disabled) 1230ab7ac4ebSTom Herbert return; 1231ab7ac4ebSTom Herbert 1232ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1233ab7ac4ebSTom Herbert 1234ab7ac4ebSTom Herbert kcm->rx_disabled = 1; 1235ab7ac4ebSTom Herbert 1236ab7ac4ebSTom Herbert /* If a psock is reserved we'll do cleanup in unreserve */ 1237ab7ac4ebSTom Herbert if (!kcm->rx_psock) { 1238ab7ac4ebSTom Herbert if (kcm->rx_wait) { 1239ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 1240ab7ac4ebSTom Herbert kcm->rx_wait = false; 1241ab7ac4ebSTom Herbert } 1242ab7ac4ebSTom Herbert 1243ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); 1244ab7ac4ebSTom Herbert } 1245ab7ac4ebSTom Herbert 1246ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1247ab7ac4ebSTom Herbert } 1248ab7ac4ebSTom Herbert 1249ab7ac4ebSTom Herbert /* kcm sock lock held */ 1250ab7ac4ebSTom Herbert static void kcm_recv_enable(struct kcm_sock *kcm) 1251ab7ac4ebSTom Herbert { 1252ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1253ab7ac4ebSTom Herbert 1254ab7ac4ebSTom Herbert if (!kcm->rx_disabled) 1255ab7ac4ebSTom Herbert return; 1256ab7ac4ebSTom Herbert 1257ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1258ab7ac4ebSTom Herbert 1259ab7ac4ebSTom Herbert kcm->rx_disabled = 0; 1260ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 1261ab7ac4ebSTom Herbert 1262ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1263ab7ac4ebSTom Herbert } 1264ab7ac4ebSTom Herbert 1265ab7ac4ebSTom Herbert static int kcm_setsockopt(struct socket *sock, int level, int optname, 1266ab7ac4ebSTom Herbert char __user *optval, unsigned int optlen) 1267ab7ac4ebSTom Herbert { 1268ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1269ab7ac4ebSTom Herbert int val, valbool; 1270ab7ac4ebSTom Herbert int err = 0; 1271ab7ac4ebSTom Herbert 1272ab7ac4ebSTom Herbert if (level != SOL_KCM) 1273ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1274ab7ac4ebSTom Herbert 1275ab7ac4ebSTom Herbert if (optlen < sizeof(int)) 1276ab7ac4ebSTom Herbert return -EINVAL; 1277ab7ac4ebSTom Herbert 1278ab7ac4ebSTom Herbert if (get_user(val, (int __user *)optval)) 1279ab7ac4ebSTom Herbert return -EINVAL; 1280ab7ac4ebSTom Herbert 1281ab7ac4ebSTom Herbert valbool = val ? 1 : 0; 1282ab7ac4ebSTom Herbert 1283ab7ac4ebSTom Herbert switch (optname) { 1284ab7ac4ebSTom Herbert case KCM_RECV_DISABLE: 1285ab7ac4ebSTom Herbert lock_sock(&kcm->sk); 1286ab7ac4ebSTom Herbert if (valbool) 1287ab7ac4ebSTom Herbert kcm_recv_disable(kcm); 1288ab7ac4ebSTom Herbert else 1289ab7ac4ebSTom Herbert kcm_recv_enable(kcm); 1290ab7ac4ebSTom Herbert release_sock(&kcm->sk); 1291ab7ac4ebSTom Herbert break; 1292ab7ac4ebSTom Herbert default: 1293ab7ac4ebSTom Herbert err = -ENOPROTOOPT; 1294ab7ac4ebSTom Herbert } 1295ab7ac4ebSTom Herbert 1296ab7ac4ebSTom Herbert return err; 1297ab7ac4ebSTom Herbert } 1298ab7ac4ebSTom Herbert 1299ab7ac4ebSTom Herbert static int kcm_getsockopt(struct socket *sock, int level, int optname, 1300ab7ac4ebSTom Herbert char __user *optval, int __user *optlen) 1301ab7ac4ebSTom Herbert { 1302ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1303ab7ac4ebSTom Herbert int val, len; 1304ab7ac4ebSTom Herbert 1305ab7ac4ebSTom Herbert if (level != SOL_KCM) 1306ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1307ab7ac4ebSTom Herbert 1308ab7ac4ebSTom Herbert if (get_user(len, optlen)) 1309ab7ac4ebSTom Herbert return -EFAULT; 1310ab7ac4ebSTom Herbert 1311ab7ac4ebSTom Herbert len = min_t(unsigned int, len, sizeof(int)); 1312ab7ac4ebSTom Herbert if (len < 0) 1313ab7ac4ebSTom Herbert return -EINVAL; 1314ab7ac4ebSTom Herbert 1315ab7ac4ebSTom Herbert switch (optname) { 1316ab7ac4ebSTom Herbert case KCM_RECV_DISABLE: 1317ab7ac4ebSTom Herbert val = kcm->rx_disabled; 1318ab7ac4ebSTom Herbert break; 1319ab7ac4ebSTom Herbert default: 1320ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1321ab7ac4ebSTom Herbert } 1322ab7ac4ebSTom Herbert 1323ab7ac4ebSTom Herbert if (put_user(len, optlen)) 1324ab7ac4ebSTom Herbert return -EFAULT; 1325ab7ac4ebSTom Herbert if (copy_to_user(optval, &val, len)) 1326ab7ac4ebSTom Herbert return -EFAULT; 1327ab7ac4ebSTom Herbert return 0; 1328ab7ac4ebSTom Herbert } 1329ab7ac4ebSTom Herbert 1330ab7ac4ebSTom Herbert static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) 1331ab7ac4ebSTom Herbert { 1332ab7ac4ebSTom Herbert struct kcm_sock *tkcm; 1333ab7ac4ebSTom Herbert struct list_head *head; 1334ab7ac4ebSTom Herbert int index = 0; 1335ab7ac4ebSTom Herbert 1336a11e1d43SLinus Torvalds /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so 1337a11e1d43SLinus Torvalds * we set sk_state, otherwise epoll_wait always returns right away with 1338a11e1d43SLinus Torvalds * EPOLLHUP 1339ab7ac4ebSTom Herbert */ 1340ab7ac4ebSTom Herbert kcm->sk.sk_state = TCP_ESTABLISHED; 1341ab7ac4ebSTom Herbert 1342ab7ac4ebSTom Herbert /* Add to mux's kcm sockets list */ 1343ab7ac4ebSTom Herbert kcm->mux = mux; 1344ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1345ab7ac4ebSTom Herbert 1346ab7ac4ebSTom Herbert head = &mux->kcm_socks; 1347ab7ac4ebSTom Herbert list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { 1348ab7ac4ebSTom Herbert if (tkcm->index != index) 1349ab7ac4ebSTom Herbert break; 1350ab7ac4ebSTom Herbert head = &tkcm->kcm_sock_list; 1351ab7ac4ebSTom Herbert index++; 1352ab7ac4ebSTom Herbert } 1353ab7ac4ebSTom Herbert 1354ab7ac4ebSTom Herbert list_add(&kcm->kcm_sock_list, head); 1355ab7ac4ebSTom Herbert kcm->index = index; 1356ab7ac4ebSTom Herbert 1357ab7ac4ebSTom Herbert mux->kcm_socks_cnt++; 1358ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1359ab7ac4ebSTom Herbert 1360ab7ac4ebSTom Herbert INIT_WORK(&kcm->tx_work, kcm_tx_work); 1361ab7ac4ebSTom Herbert 1362ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1363ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 1364ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1365ab7ac4ebSTom Herbert } 1366ab7ac4ebSTom Herbert 1367ab7ac4ebSTom Herbert static int kcm_attach(struct socket *sock, struct socket *csock, 1368ab7ac4ebSTom Herbert struct bpf_prog *prog) 1369ab7ac4ebSTom Herbert { 1370ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1371ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1372ab7ac4ebSTom Herbert struct sock *csk; 1373ab7ac4ebSTom Herbert struct kcm_psock *psock = NULL, *tpsock; 1374ab7ac4ebSTom Herbert struct list_head *head; 1375ab7ac4ebSTom Herbert int index = 0; 13763fd87127SEric Biggers static const struct strp_callbacks cb = { 13773fd87127SEric Biggers .rcv_msg = kcm_rcv_strparser, 13783fd87127SEric Biggers .parse_msg = kcm_parse_func_strparser, 13793fd87127SEric Biggers .read_sock_done = kcm_read_sock_done, 13803fd87127SEric Biggers }; 13812cc683e8STom Herbert int err = 0; 1382ab7ac4ebSTom Herbert 1383ab7ac4ebSTom Herbert csk = csock->sk; 1384ab7ac4ebSTom Herbert if (!csk) 1385ab7ac4ebSTom Herbert return -EINVAL; 1386ab7ac4ebSTom Herbert 13872cc683e8STom Herbert lock_sock(csk); 13882cc683e8STom Herbert 1389581e7226STom Herbert /* Only allow TCP sockets to be attached for now */ 1390581e7226STom Herbert if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || 13912cc683e8STom Herbert csk->sk_protocol != IPPROTO_TCP) { 13922cc683e8STom Herbert err = -EOPNOTSUPP; 13932cc683e8STom Herbert goto out; 13942cc683e8STom Herbert } 1395581e7226STom Herbert 1396581e7226STom Herbert /* Don't allow listeners or closed sockets */ 13972cc683e8STom Herbert if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) { 13982cc683e8STom Herbert err = -EOPNOTSUPP; 13992cc683e8STom Herbert goto out; 14002cc683e8STom Herbert } 1401351050ecSEric Dumazet 1402ab7ac4ebSTom Herbert psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); 14032cc683e8STom Herbert if (!psock) { 14042cc683e8STom Herbert err = -ENOMEM; 14052cc683e8STom Herbert goto out; 14062cc683e8STom Herbert } 1407ab7ac4ebSTom Herbert 1408ab7ac4ebSTom Herbert psock->mux = mux; 1409ab7ac4ebSTom Herbert psock->sk = csk; 1410ab7ac4ebSTom Herbert psock->bpf_prog = prog; 141129152a34STom Herbert 141296a59083STom Herbert err = strp_init(&psock->strp, csk, &cb); 141396a59083STom Herbert if (err) { 141496a59083STom Herbert kmem_cache_free(kcm_psockp, psock); 14152cc683e8STom Herbert goto out; 141696a59083STom Herbert } 1417ab7ac4ebSTom Herbert 1418ab7ac4ebSTom Herbert write_lock_bh(&csk->sk_callback_lock); 1419e5571240STom Herbert 1420e5571240STom Herbert /* Check if sk_user_data is aready by KCM or someone else. 1421e5571240STom Herbert * Must be done under lock to prevent race conditions. 1422e5571240STom Herbert */ 1423e5571240STom Herbert if (csk->sk_user_data) { 1424e5571240STom Herbert write_unlock_bh(&csk->sk_callback_lock); 1425dff8baa2STom Herbert strp_stop(&psock->strp); 1426e5571240STom Herbert strp_done(&psock->strp); 1427e5571240STom Herbert kmem_cache_free(kcm_psockp, psock); 14282cc683e8STom Herbert err = -EALREADY; 14292cc683e8STom Herbert goto out; 1430e5571240STom Herbert } 1431e5571240STom Herbert 1432ab7ac4ebSTom Herbert psock->save_data_ready = csk->sk_data_ready; 1433ab7ac4ebSTom Herbert psock->save_write_space = csk->sk_write_space; 1434ab7ac4ebSTom Herbert psock->save_state_change = csk->sk_state_change; 1435ab7ac4ebSTom Herbert csk->sk_user_data = psock; 143696a59083STom Herbert csk->sk_data_ready = psock_data_ready; 143796a59083STom Herbert csk->sk_write_space = psock_write_space; 143896a59083STom Herbert csk->sk_state_change = psock_state_change; 1439e5571240STom Herbert 1440ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 1441ab7ac4ebSTom Herbert 1442e5571240STom Herbert sock_hold(csk); 1443e5571240STom Herbert 1444ab7ac4ebSTom Herbert /* Finished initialization, now add the psock to the MUX. */ 1445ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1446ab7ac4ebSTom Herbert head = &mux->psocks; 1447ab7ac4ebSTom Herbert list_for_each_entry(tpsock, &mux->psocks, psock_list) { 1448ab7ac4ebSTom Herbert if (tpsock->index != index) 1449ab7ac4ebSTom Herbert break; 1450ab7ac4ebSTom Herbert head = &tpsock->psock_list; 1451ab7ac4ebSTom Herbert index++; 1452ab7ac4ebSTom Herbert } 1453ab7ac4ebSTom Herbert 1454ab7ac4ebSTom Herbert list_add(&psock->psock_list, head); 1455ab7ac4ebSTom Herbert psock->index = index; 1456ab7ac4ebSTom Herbert 1457cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_attach); 1458ab7ac4ebSTom Herbert mux->psocks_cnt++; 1459ab7ac4ebSTom Herbert psock_now_avail(psock); 1460ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1461ab7ac4ebSTom Herbert 1462ab7ac4ebSTom Herbert /* Schedule RX work in case there are already bytes queued */ 14639b73896aSTom Herbert strp_check_rcv(&psock->strp); 1464ab7ac4ebSTom Herbert 14652cc683e8STom Herbert out: 14662cc683e8STom Herbert release_sock(csk); 14672cc683e8STom Herbert 14682cc683e8STom Herbert return err; 1469ab7ac4ebSTom Herbert } 1470ab7ac4ebSTom Herbert 1471ab7ac4ebSTom Herbert static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) 1472ab7ac4ebSTom Herbert { 1473ab7ac4ebSTom Herbert struct socket *csock; 1474ab7ac4ebSTom Herbert struct bpf_prog *prog; 1475ab7ac4ebSTom Herbert int err; 1476ab7ac4ebSTom Herbert 1477ab7ac4ebSTom Herbert csock = sockfd_lookup(info->fd, &err); 1478ab7ac4ebSTom Herbert if (!csock) 1479ab7ac4ebSTom Herbert return -ENOENT; 1480ab7ac4ebSTom Herbert 1481113214beSDaniel Borkmann prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER); 1482ab7ac4ebSTom Herbert if (IS_ERR(prog)) { 1483ab7ac4ebSTom Herbert err = PTR_ERR(prog); 1484ab7ac4ebSTom Herbert goto out; 1485ab7ac4ebSTom Herbert } 1486ab7ac4ebSTom Herbert 1487ab7ac4ebSTom Herbert err = kcm_attach(sock, csock, prog); 1488ab7ac4ebSTom Herbert if (err) { 1489ab7ac4ebSTom Herbert bpf_prog_put(prog); 1490ab7ac4ebSTom Herbert goto out; 1491ab7ac4ebSTom Herbert } 1492ab7ac4ebSTom Herbert 1493ab7ac4ebSTom Herbert /* Keep reference on file also */ 1494ab7ac4ebSTom Herbert 1495ab7ac4ebSTom Herbert return 0; 1496ab7ac4ebSTom Herbert out: 1497ab7ac4ebSTom Herbert fput(csock->file); 1498ab7ac4ebSTom Herbert return err; 1499ab7ac4ebSTom Herbert } 1500ab7ac4ebSTom Herbert 1501ab7ac4ebSTom Herbert static void kcm_unattach(struct kcm_psock *psock) 1502ab7ac4ebSTom Herbert { 1503ab7ac4ebSTom Herbert struct sock *csk = psock->sk; 1504ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 1505ab7ac4ebSTom Herbert 15061616b38fSTom Herbert lock_sock(csk); 15071616b38fSTom Herbert 1508ab7ac4ebSTom Herbert /* Stop getting callbacks from TCP socket. After this there should 1509ab7ac4ebSTom Herbert * be no way to reserve a kcm for this psock. 1510ab7ac4ebSTom Herbert */ 1511ab7ac4ebSTom Herbert write_lock_bh(&csk->sk_callback_lock); 1512ab7ac4ebSTom Herbert csk->sk_user_data = NULL; 1513ab7ac4ebSTom Herbert csk->sk_data_ready = psock->save_data_ready; 1514ab7ac4ebSTom Herbert csk->sk_write_space = psock->save_write_space; 1515ab7ac4ebSTom Herbert csk->sk_state_change = psock->save_state_change; 15169b73896aSTom Herbert strp_stop(&psock->strp); 1517ab7ac4ebSTom Herbert 1518ab7ac4ebSTom Herbert if (WARN_ON(psock->rx_kcm)) { 1519ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 15202cc683e8STom Herbert release_sock(csk); 1521ab7ac4ebSTom Herbert return; 1522ab7ac4ebSTom Herbert } 1523ab7ac4ebSTom Herbert 1524ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1525ab7ac4ebSTom Herbert 1526ab7ac4ebSTom Herbert /* Stop receiver activities. After this point psock should not be 1527ab7ac4ebSTom Herbert * able to get onto ready list either through callbacks or work. 1528ab7ac4ebSTom Herbert */ 1529ab7ac4ebSTom Herbert if (psock->ready_rx_msg) { 1530ab7ac4ebSTom Herbert list_del(&psock->psock_ready_list); 1531ab7ac4ebSTom Herbert kfree_skb(psock->ready_rx_msg); 1532ab7ac4ebSTom Herbert psock->ready_rx_msg = NULL; 1533cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.rx_ready_drops); 1534ab7ac4ebSTom Herbert } 1535ab7ac4ebSTom Herbert 1536ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1537ab7ac4ebSTom Herbert 1538ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 1539ab7ac4ebSTom Herbert 15401616b38fSTom Herbert /* Call strp_done without sock lock */ 15411616b38fSTom Herbert release_sock(csk); 15429b73896aSTom Herbert strp_done(&psock->strp); 15431616b38fSTom Herbert lock_sock(csk); 1544ab7ac4ebSTom Herbert 1545ab7ac4ebSTom Herbert bpf_prog_put(psock->bpf_prog); 1546ab7ac4ebSTom Herbert 1547ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1548ab7ac4ebSTom Herbert 1549cd6e111bSTom Herbert aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); 15509b73896aSTom Herbert save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); 1551cd6e111bSTom Herbert 1552cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_unattach); 1553cd6e111bSTom Herbert 1554ab7ac4ebSTom Herbert if (psock->tx_kcm) { 1555ab7ac4ebSTom Herbert /* psock was reserved. Just mark it finished and we will clean 1556ab7ac4ebSTom Herbert * up in the kcm paths, we need kcm lock which can not be 1557ab7ac4ebSTom Herbert * acquired here. 1558ab7ac4ebSTom Herbert */ 1559cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_unattach_rsvd); 1560ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1561ab7ac4ebSTom Herbert 1562ab7ac4ebSTom Herbert /* We are unattaching a socket that is reserved. Abort the 1563ab7ac4ebSTom Herbert * socket since we may be out of sync in sending on it. We need 1564ab7ac4ebSTom Herbert * to do this without the mux lock. 1565ab7ac4ebSTom Herbert */ 1566ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, EPIPE, false); 1567ab7ac4ebSTom Herbert 1568ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1569ab7ac4ebSTom Herbert if (!psock->tx_kcm) { 1570ab7ac4ebSTom Herbert /* psock now unreserved in window mux was unlocked */ 1571ab7ac4ebSTom Herbert goto no_reserved; 1572ab7ac4ebSTom Herbert } 1573ab7ac4ebSTom Herbert psock->done = 1; 1574ab7ac4ebSTom Herbert 1575ab7ac4ebSTom Herbert /* Commit done before queuing work to process it */ 1576ab7ac4ebSTom Herbert smp_mb(); 1577ab7ac4ebSTom Herbert 1578ab7ac4ebSTom Herbert /* Queue tx work to make sure psock->done is handled */ 1579ab7ac4ebSTom Herbert queue_work(kcm_wq, &psock->tx_kcm->tx_work); 1580ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1581ab7ac4ebSTom Herbert } else { 1582ab7ac4ebSTom Herbert no_reserved: 1583ab7ac4ebSTom Herbert if (!psock->tx_stopped) 1584ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 1585ab7ac4ebSTom Herbert list_del(&psock->psock_list); 1586ab7ac4ebSTom Herbert mux->psocks_cnt--; 1587ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1588ab7ac4ebSTom Herbert 1589ab7ac4ebSTom Herbert sock_put(csk); 1590ab7ac4ebSTom Herbert fput(csk->sk_socket->file); 1591ab7ac4ebSTom Herbert kmem_cache_free(kcm_psockp, psock); 1592ab7ac4ebSTom Herbert } 15931616b38fSTom Herbert 15941616b38fSTom Herbert release_sock(csk); 1595ab7ac4ebSTom Herbert } 1596ab7ac4ebSTom Herbert 1597ab7ac4ebSTom Herbert static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) 1598ab7ac4ebSTom Herbert { 1599ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1600ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1601ab7ac4ebSTom Herbert struct kcm_psock *psock; 1602ab7ac4ebSTom Herbert struct socket *csock; 1603ab7ac4ebSTom Herbert struct sock *csk; 1604ab7ac4ebSTom Herbert int err; 1605ab7ac4ebSTom Herbert 1606ab7ac4ebSTom Herbert csock = sockfd_lookup(info->fd, &err); 1607ab7ac4ebSTom Herbert if (!csock) 1608ab7ac4ebSTom Herbert return -ENOENT; 1609ab7ac4ebSTom Herbert 1610ab7ac4ebSTom Herbert csk = csock->sk; 1611ab7ac4ebSTom Herbert if (!csk) { 1612ab7ac4ebSTom Herbert err = -EINVAL; 1613ab7ac4ebSTom Herbert goto out; 1614ab7ac4ebSTom Herbert } 1615ab7ac4ebSTom Herbert 1616ab7ac4ebSTom Herbert err = -ENOENT; 1617ab7ac4ebSTom Herbert 1618ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1619ab7ac4ebSTom Herbert 1620ab7ac4ebSTom Herbert list_for_each_entry(psock, &mux->psocks, psock_list) { 1621ab7ac4ebSTom Herbert if (psock->sk != csk) 1622ab7ac4ebSTom Herbert continue; 1623ab7ac4ebSTom Herbert 1624ab7ac4ebSTom Herbert /* Found the matching psock */ 1625ab7ac4ebSTom Herbert 1626ab7ac4ebSTom Herbert if (psock->unattaching || WARN_ON(psock->done)) { 1627ab7ac4ebSTom Herbert err = -EALREADY; 1628ab7ac4ebSTom Herbert break; 1629ab7ac4ebSTom Herbert } 1630ab7ac4ebSTom Herbert 1631ab7ac4ebSTom Herbert psock->unattaching = 1; 1632ab7ac4ebSTom Herbert 1633ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1634ab7ac4ebSTom Herbert 16359b73896aSTom Herbert /* Lower socket lock should already be held */ 1636ab7ac4ebSTom Herbert kcm_unattach(psock); 1637ab7ac4ebSTom Herbert 1638ab7ac4ebSTom Herbert err = 0; 1639ab7ac4ebSTom Herbert goto out; 1640ab7ac4ebSTom Herbert } 1641ab7ac4ebSTom Herbert 1642ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1643ab7ac4ebSTom Herbert 1644ab7ac4ebSTom Herbert out: 1645ab7ac4ebSTom Herbert fput(csock->file); 1646ab7ac4ebSTom Herbert return err; 1647ab7ac4ebSTom Herbert } 1648ab7ac4ebSTom Herbert 1649ab7ac4ebSTom Herbert static struct proto kcm_proto = { 1650ab7ac4ebSTom Herbert .name = "KCM", 1651ab7ac4ebSTom Herbert .owner = THIS_MODULE, 1652ab7ac4ebSTom Herbert .obj_size = sizeof(struct kcm_sock), 1653ab7ac4ebSTom Herbert }; 1654ab7ac4ebSTom Herbert 1655ab7ac4ebSTom Herbert /* Clone a kcm socket. */ 1656a5739435SAl Viro static struct file *kcm_clone(struct socket *osock) 1657ab7ac4ebSTom Herbert { 1658ab7ac4ebSTom Herbert struct socket *newsock; 1659ab7ac4ebSTom Herbert struct sock *newsk; 1660ab7ac4ebSTom Herbert 1661ab7ac4ebSTom Herbert newsock = sock_alloc(); 1662ab7ac4ebSTom Herbert if (!newsock) 1663a5739435SAl Viro return ERR_PTR(-ENFILE); 1664ab7ac4ebSTom Herbert 1665ab7ac4ebSTom Herbert newsock->type = osock->type; 1666ab7ac4ebSTom Herbert newsock->ops = osock->ops; 1667ab7ac4ebSTom Herbert 1668ab7ac4ebSTom Herbert __module_get(newsock->ops->owner); 1669ab7ac4ebSTom Herbert 1670ab7ac4ebSTom Herbert newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, 1671eb7f54b9SKirill Tkhai &kcm_proto, false); 1672ab7ac4ebSTom Herbert if (!newsk) { 1673a5739435SAl Viro sock_release(newsock); 1674a5739435SAl Viro return ERR_PTR(-ENOMEM); 1675ab7ac4ebSTom Herbert } 1676ab7ac4ebSTom Herbert sock_init_data(newsock, newsk); 1677ab7ac4ebSTom Herbert init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); 1678ab7ac4ebSTom Herbert 16798e1611e2SAl Viro return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); 1680ab7ac4ebSTom Herbert } 1681ab7ac4ebSTom Herbert 1682ab7ac4ebSTom Herbert static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1683ab7ac4ebSTom Herbert { 1684ab7ac4ebSTom Herbert int err; 1685ab7ac4ebSTom Herbert 1686ab7ac4ebSTom Herbert switch (cmd) { 1687ab7ac4ebSTom Herbert case SIOCKCMATTACH: { 1688ab7ac4ebSTom Herbert struct kcm_attach info; 1689ab7ac4ebSTom Herbert 1690ab7ac4ebSTom Herbert if (copy_from_user(&info, (void __user *)arg, sizeof(info))) 1691a80db69eSWANG Cong return -EFAULT; 1692ab7ac4ebSTom Herbert 1693ab7ac4ebSTom Herbert err = kcm_attach_ioctl(sock, &info); 1694ab7ac4ebSTom Herbert 1695ab7ac4ebSTom Herbert break; 1696ab7ac4ebSTom Herbert } 1697ab7ac4ebSTom Herbert case SIOCKCMUNATTACH: { 1698ab7ac4ebSTom Herbert struct kcm_unattach info; 1699ab7ac4ebSTom Herbert 1700ab7ac4ebSTom Herbert if (copy_from_user(&info, (void __user *)arg, sizeof(info))) 1701a80db69eSWANG Cong return -EFAULT; 1702ab7ac4ebSTom Herbert 1703ab7ac4ebSTom Herbert err = kcm_unattach_ioctl(sock, &info); 1704ab7ac4ebSTom Herbert 1705ab7ac4ebSTom Herbert break; 1706ab7ac4ebSTom Herbert } 1707ab7ac4ebSTom Herbert case SIOCKCMCLONE: { 1708ab7ac4ebSTom Herbert struct kcm_clone info; 1709a5739435SAl Viro struct file *file; 1710ab7ac4ebSTom Herbert 1711a5739435SAl Viro info.fd = get_unused_fd_flags(0); 1712a5739435SAl Viro if (unlikely(info.fd < 0)) 1713a5739435SAl Viro return info.fd; 1714a5739435SAl Viro 1715a5739435SAl Viro file = kcm_clone(sock); 1716a5739435SAl Viro if (IS_ERR(file)) { 1717a5739435SAl Viro put_unused_fd(info.fd); 1718a5739435SAl Viro return PTR_ERR(file); 1719a5739435SAl Viro } 1720ab7ac4ebSTom Herbert if (copy_to_user((void __user *)arg, &info, 1721ab7ac4ebSTom Herbert sizeof(info))) { 1722a5739435SAl Viro put_unused_fd(info.fd); 1723a5739435SAl Viro fput(file); 1724a5739435SAl Viro return -EFAULT; 1725ab7ac4ebSTom Herbert } 1726a5739435SAl Viro fd_install(info.fd, file); 1727a5739435SAl Viro err = 0; 1728ab7ac4ebSTom Herbert break; 1729ab7ac4ebSTom Herbert } 1730ab7ac4ebSTom Herbert default: 1731ab7ac4ebSTom Herbert err = -ENOIOCTLCMD; 1732ab7ac4ebSTom Herbert break; 1733ab7ac4ebSTom Herbert } 1734ab7ac4ebSTom Herbert 1735ab7ac4ebSTom Herbert return err; 1736ab7ac4ebSTom Herbert } 1737ab7ac4ebSTom Herbert 1738ab7ac4ebSTom Herbert static void free_mux(struct rcu_head *rcu) 1739ab7ac4ebSTom Herbert { 1740ab7ac4ebSTom Herbert struct kcm_mux *mux = container_of(rcu, 1741ab7ac4ebSTom Herbert struct kcm_mux, rcu); 1742ab7ac4ebSTom Herbert 1743ab7ac4ebSTom Herbert kmem_cache_free(kcm_muxp, mux); 1744ab7ac4ebSTom Herbert } 1745ab7ac4ebSTom Herbert 1746ab7ac4ebSTom Herbert static void release_mux(struct kcm_mux *mux) 1747ab7ac4ebSTom Herbert { 1748ab7ac4ebSTom Herbert struct kcm_net *knet = mux->knet; 1749ab7ac4ebSTom Herbert struct kcm_psock *psock, *tmp_psock; 1750ab7ac4ebSTom Herbert 1751ab7ac4ebSTom Herbert /* Release psocks */ 1752ab7ac4ebSTom Herbert list_for_each_entry_safe(psock, tmp_psock, 1753ab7ac4ebSTom Herbert &mux->psocks, psock_list) { 17541616b38fSTom Herbert if (!WARN_ON(psock->unattaching)) 1755ab7ac4ebSTom Herbert kcm_unattach(psock); 1756ab7ac4ebSTom Herbert } 1757ab7ac4ebSTom Herbert 1758ab7ac4ebSTom Herbert if (WARN_ON(mux->psocks_cnt)) 1759ab7ac4ebSTom Herbert return; 1760ab7ac4ebSTom Herbert 1761ab7ac4ebSTom Herbert __skb_queue_purge(&mux->rx_hold_queue); 1762ab7ac4ebSTom Herbert 1763ab7ac4ebSTom Herbert mutex_lock(&knet->mutex); 1764cd6e111bSTom Herbert aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); 1765cd6e111bSTom Herbert aggregate_psock_stats(&mux->aggregate_psock_stats, 1766cd6e111bSTom Herbert &knet->aggregate_psock_stats); 17679b73896aSTom Herbert aggregate_strp_stats(&mux->aggregate_strp_stats, 17689b73896aSTom Herbert &knet->aggregate_strp_stats); 1769ab7ac4ebSTom Herbert list_del_rcu(&mux->kcm_mux_list); 1770ab7ac4ebSTom Herbert knet->count--; 1771ab7ac4ebSTom Herbert mutex_unlock(&knet->mutex); 1772ab7ac4ebSTom Herbert 1773ab7ac4ebSTom Herbert call_rcu(&mux->rcu, free_mux); 1774ab7ac4ebSTom Herbert } 1775ab7ac4ebSTom Herbert 1776ab7ac4ebSTom Herbert static void kcm_done(struct kcm_sock *kcm) 1777ab7ac4ebSTom Herbert { 1778ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1779ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 1780ab7ac4ebSTom Herbert int socks_cnt; 1781ab7ac4ebSTom Herbert 1782ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1783ab7ac4ebSTom Herbert if (kcm->rx_psock) { 1784ab7ac4ebSTom Herbert /* Cleanup in unreserve_rx_kcm */ 1785ab7ac4ebSTom Herbert WARN_ON(kcm->done); 1786ab7ac4ebSTom Herbert kcm->rx_disabled = 1; 1787ab7ac4ebSTom Herbert kcm->done = 1; 1788ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1789ab7ac4ebSTom Herbert return; 1790ab7ac4ebSTom Herbert } 1791ab7ac4ebSTom Herbert 1792ab7ac4ebSTom Herbert if (kcm->rx_wait) { 1793ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 1794ab7ac4ebSTom Herbert kcm->rx_wait = false; 1795ab7ac4ebSTom Herbert } 1796ab7ac4ebSTom Herbert /* Move any pending receive messages to other kcm sockets */ 1797ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &sk->sk_receive_queue); 1798ab7ac4ebSTom Herbert 1799ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1800ab7ac4ebSTom Herbert 1801ab7ac4ebSTom Herbert if (WARN_ON(sk_rmem_alloc_get(sk))) 1802ab7ac4ebSTom Herbert return; 1803ab7ac4ebSTom Herbert 1804ab7ac4ebSTom Herbert /* Detach from MUX */ 1805ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1806ab7ac4ebSTom Herbert 1807ab7ac4ebSTom Herbert list_del(&kcm->kcm_sock_list); 1808ab7ac4ebSTom Herbert mux->kcm_socks_cnt--; 1809ab7ac4ebSTom Herbert socks_cnt = mux->kcm_socks_cnt; 1810ab7ac4ebSTom Herbert 1811ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1812ab7ac4ebSTom Herbert 1813ab7ac4ebSTom Herbert if (!socks_cnt) { 1814ab7ac4ebSTom Herbert /* We are done with the mux now. */ 1815ab7ac4ebSTom Herbert release_mux(mux); 1816ab7ac4ebSTom Herbert } 1817ab7ac4ebSTom Herbert 1818ab7ac4ebSTom Herbert WARN_ON(kcm->rx_wait); 1819ab7ac4ebSTom Herbert 1820ab7ac4ebSTom Herbert sock_put(&kcm->sk); 1821ab7ac4ebSTom Herbert } 1822ab7ac4ebSTom Herbert 1823ab7ac4ebSTom Herbert /* Called by kcm_release to close a KCM socket. 1824ab7ac4ebSTom Herbert * If this is the last KCM socket on the MUX, destroy the MUX. 1825ab7ac4ebSTom Herbert */ 1826ab7ac4ebSTom Herbert static int kcm_release(struct socket *sock) 1827ab7ac4ebSTom Herbert { 1828ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 1829ab7ac4ebSTom Herbert struct kcm_sock *kcm; 1830ab7ac4ebSTom Herbert struct kcm_mux *mux; 1831ab7ac4ebSTom Herbert struct kcm_psock *psock; 1832ab7ac4ebSTom Herbert 1833ab7ac4ebSTom Herbert if (!sk) 1834ab7ac4ebSTom Herbert return 0; 1835ab7ac4ebSTom Herbert 1836ab7ac4ebSTom Herbert kcm = kcm_sk(sk); 1837ab7ac4ebSTom Herbert mux = kcm->mux; 1838ab7ac4ebSTom Herbert 1839ab7ac4ebSTom Herbert sock_orphan(sk); 1840ab7ac4ebSTom Herbert kfree_skb(kcm->seq_skb); 1841ab7ac4ebSTom Herbert 1842ab7ac4ebSTom Herbert lock_sock(sk); 1843ab7ac4ebSTom Herbert /* Purge queue under lock to avoid race condition with tx_work trying 1844ab7ac4ebSTom Herbert * to act when queue is nonempty. If tx_work runs after this point 1845ab7ac4ebSTom Herbert * it will just return. 1846ab7ac4ebSTom Herbert */ 1847ab7ac4ebSTom Herbert __skb_queue_purge(&sk->sk_write_queue); 18489b73896aSTom Herbert 18499b73896aSTom Herbert /* Set tx_stopped. This is checked when psock is bound to a kcm and we 18509b73896aSTom Herbert * get a writespace callback. This prevents further work being queued 18519b73896aSTom Herbert * from the callback (unbinding the psock occurs after canceling work. 18529b73896aSTom Herbert */ 18539b73896aSTom Herbert kcm->tx_stopped = 1; 18549b73896aSTom Herbert 1855ab7ac4ebSTom Herbert release_sock(sk); 1856ab7ac4ebSTom Herbert 1857ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1858ab7ac4ebSTom Herbert if (kcm->tx_wait) { 1859ab7ac4ebSTom Herbert /* Take of tx_wait list, after this point there should be no way 1860ab7ac4ebSTom Herbert * that a psock will be assigned to this kcm. 1861ab7ac4ebSTom Herbert */ 1862ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 1863ab7ac4ebSTom Herbert kcm->tx_wait = false; 1864ab7ac4ebSTom Herbert } 1865ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1866ab7ac4ebSTom Herbert 1867ab7ac4ebSTom Herbert /* Cancel work. After this point there should be no outside references 1868ab7ac4ebSTom Herbert * to the kcm socket. 1869ab7ac4ebSTom Herbert */ 1870ab7ac4ebSTom Herbert cancel_work_sync(&kcm->tx_work); 1871ab7ac4ebSTom Herbert 1872ab7ac4ebSTom Herbert lock_sock(sk); 1873ab7ac4ebSTom Herbert psock = kcm->tx_psock; 1874ab7ac4ebSTom Herbert if (psock) { 1875ab7ac4ebSTom Herbert /* A psock was reserved, so we need to kill it since it 1876ab7ac4ebSTom Herbert * may already have some bytes queued from a message. We 1877ab7ac4ebSTom Herbert * need to do this after removing kcm from tx_wait list. 1878ab7ac4ebSTom Herbert */ 1879ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, EPIPE, false); 1880ab7ac4ebSTom Herbert unreserve_psock(kcm); 1881ab7ac4ebSTom Herbert } 1882ab7ac4ebSTom Herbert release_sock(sk); 1883ab7ac4ebSTom Herbert 1884ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 1885ab7ac4ebSTom Herbert WARN_ON(kcm->tx_psock); 1886ab7ac4ebSTom Herbert 1887ab7ac4ebSTom Herbert sock->sk = NULL; 1888ab7ac4ebSTom Herbert 1889ab7ac4ebSTom Herbert kcm_done(kcm); 1890ab7ac4ebSTom Herbert 1891ab7ac4ebSTom Herbert return 0; 1892ab7ac4ebSTom Herbert } 1893ab7ac4ebSTom Herbert 189491687355STom Herbert static const struct proto_ops kcm_dgram_ops = { 1895ab7ac4ebSTom Herbert .family = PF_KCM, 1896ab7ac4ebSTom Herbert .owner = THIS_MODULE, 1897ab7ac4ebSTom Herbert .release = kcm_release, 1898ab7ac4ebSTom Herbert .bind = sock_no_bind, 1899ab7ac4ebSTom Herbert .connect = sock_no_connect, 1900ab7ac4ebSTom Herbert .socketpair = sock_no_socketpair, 1901ab7ac4ebSTom Herbert .accept = sock_no_accept, 1902ab7ac4ebSTom Herbert .getname = sock_no_getname, 1903a11e1d43SLinus Torvalds .poll = datagram_poll, 1904ab7ac4ebSTom Herbert .ioctl = kcm_ioctl, 1905ab7ac4ebSTom Herbert .listen = sock_no_listen, 1906ab7ac4ebSTom Herbert .shutdown = sock_no_shutdown, 1907ab7ac4ebSTom Herbert .setsockopt = kcm_setsockopt, 1908ab7ac4ebSTom Herbert .getsockopt = kcm_getsockopt, 1909ab7ac4ebSTom Herbert .sendmsg = kcm_sendmsg, 1910ab7ac4ebSTom Herbert .recvmsg = kcm_recvmsg, 1911ab7ac4ebSTom Herbert .mmap = sock_no_mmap, 1912f29698fcSTom Herbert .sendpage = kcm_sendpage, 1913ab7ac4ebSTom Herbert }; 1914ab7ac4ebSTom Herbert 191591687355STom Herbert static const struct proto_ops kcm_seqpacket_ops = { 191691687355STom Herbert .family = PF_KCM, 191791687355STom Herbert .owner = THIS_MODULE, 191891687355STom Herbert .release = kcm_release, 191991687355STom Herbert .bind = sock_no_bind, 192091687355STom Herbert .connect = sock_no_connect, 192191687355STom Herbert .socketpair = sock_no_socketpair, 192291687355STom Herbert .accept = sock_no_accept, 192391687355STom Herbert .getname = sock_no_getname, 1924a11e1d43SLinus Torvalds .poll = datagram_poll, 192591687355STom Herbert .ioctl = kcm_ioctl, 192691687355STom Herbert .listen = sock_no_listen, 192791687355STom Herbert .shutdown = sock_no_shutdown, 192891687355STom Herbert .setsockopt = kcm_setsockopt, 192991687355STom Herbert .getsockopt = kcm_getsockopt, 193091687355STom Herbert .sendmsg = kcm_sendmsg, 193191687355STom Herbert .recvmsg = kcm_recvmsg, 193291687355STom Herbert .mmap = sock_no_mmap, 1933f29698fcSTom Herbert .sendpage = kcm_sendpage, 193491687355STom Herbert .splice_read = kcm_splice_read, 193591687355STom Herbert }; 193691687355STom Herbert 1937ab7ac4ebSTom Herbert /* Create proto operation for kcm sockets */ 1938ab7ac4ebSTom Herbert static int kcm_create(struct net *net, struct socket *sock, 1939ab7ac4ebSTom Herbert int protocol, int kern) 1940ab7ac4ebSTom Herbert { 1941ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 1942ab7ac4ebSTom Herbert struct sock *sk; 1943ab7ac4ebSTom Herbert struct kcm_mux *mux; 1944ab7ac4ebSTom Herbert 1945ab7ac4ebSTom Herbert switch (sock->type) { 1946ab7ac4ebSTom Herbert case SOCK_DGRAM: 194791687355STom Herbert sock->ops = &kcm_dgram_ops; 194891687355STom Herbert break; 1949ab7ac4ebSTom Herbert case SOCK_SEQPACKET: 195091687355STom Herbert sock->ops = &kcm_seqpacket_ops; 1951ab7ac4ebSTom Herbert break; 1952ab7ac4ebSTom Herbert default: 1953ab7ac4ebSTom Herbert return -ESOCKTNOSUPPORT; 1954ab7ac4ebSTom Herbert } 1955ab7ac4ebSTom Herbert 1956ab7ac4ebSTom Herbert if (protocol != KCMPROTO_CONNECTED) 1957ab7ac4ebSTom Herbert return -EPROTONOSUPPORT; 1958ab7ac4ebSTom Herbert 1959ab7ac4ebSTom Herbert sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); 1960ab7ac4ebSTom Herbert if (!sk) 1961ab7ac4ebSTom Herbert return -ENOMEM; 1962ab7ac4ebSTom Herbert 1963ab7ac4ebSTom Herbert /* Allocate a kcm mux, shared between KCM sockets */ 1964ab7ac4ebSTom Herbert mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); 1965ab7ac4ebSTom Herbert if (!mux) { 1966ab7ac4ebSTom Herbert sk_free(sk); 1967ab7ac4ebSTom Herbert return -ENOMEM; 1968ab7ac4ebSTom Herbert } 1969ab7ac4ebSTom Herbert 1970ab7ac4ebSTom Herbert spin_lock_init(&mux->lock); 1971ab7ac4ebSTom Herbert spin_lock_init(&mux->rx_lock); 1972ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_socks); 1973ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_rx_waiters); 1974ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_tx_waiters); 1975ab7ac4ebSTom Herbert 1976ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks); 1977ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks_ready); 1978ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks_avail); 1979ab7ac4ebSTom Herbert 1980ab7ac4ebSTom Herbert mux->knet = knet; 1981ab7ac4ebSTom Herbert 1982ab7ac4ebSTom Herbert /* Add new MUX to list */ 1983ab7ac4ebSTom Herbert mutex_lock(&knet->mutex); 1984ab7ac4ebSTom Herbert list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); 1985ab7ac4ebSTom Herbert knet->count++; 1986ab7ac4ebSTom Herbert mutex_unlock(&knet->mutex); 1987ab7ac4ebSTom Herbert 1988ab7ac4ebSTom Herbert skb_queue_head_init(&mux->rx_hold_queue); 1989ab7ac4ebSTom Herbert 1990ab7ac4ebSTom Herbert /* Init KCM socket */ 1991ab7ac4ebSTom Herbert sock_init_data(sock, sk); 1992ab7ac4ebSTom Herbert init_kcm_sock(kcm_sk(sk), mux); 1993ab7ac4ebSTom Herbert 1994ab7ac4ebSTom Herbert return 0; 1995ab7ac4ebSTom Herbert } 1996ab7ac4ebSTom Herbert 1997173e7837Slinzhang static const struct net_proto_family kcm_family_ops = { 1998ab7ac4ebSTom Herbert .family = PF_KCM, 1999ab7ac4ebSTom Herbert .create = kcm_create, 2000ab7ac4ebSTom Herbert .owner = THIS_MODULE, 2001ab7ac4ebSTom Herbert }; 2002ab7ac4ebSTom Herbert 2003ab7ac4ebSTom Herbert static __net_init int kcm_init_net(struct net *net) 2004ab7ac4ebSTom Herbert { 2005ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 2006ab7ac4ebSTom Herbert 2007ab7ac4ebSTom Herbert INIT_LIST_HEAD_RCU(&knet->mux_list); 2008ab7ac4ebSTom Herbert mutex_init(&knet->mutex); 2009ab7ac4ebSTom Herbert 2010ab7ac4ebSTom Herbert return 0; 2011ab7ac4ebSTom Herbert } 2012ab7ac4ebSTom Herbert 2013ab7ac4ebSTom Herbert static __net_exit void kcm_exit_net(struct net *net) 2014ab7ac4ebSTom Herbert { 2015ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 2016ab7ac4ebSTom Herbert 2017ab7ac4ebSTom Herbert /* All KCM sockets should be closed at this point, which should mean 2018ab7ac4ebSTom Herbert * that all multiplexors and psocks have been destroyed. 2019ab7ac4ebSTom Herbert */ 2020ab7ac4ebSTom Herbert WARN_ON(!list_empty(&knet->mux_list)); 2021ab7ac4ebSTom Herbert } 2022ab7ac4ebSTom Herbert 2023ab7ac4ebSTom Herbert static struct pernet_operations kcm_net_ops = { 2024ab7ac4ebSTom Herbert .init = kcm_init_net, 2025ab7ac4ebSTom Herbert .exit = kcm_exit_net, 2026ab7ac4ebSTom Herbert .id = &kcm_net_id, 2027ab7ac4ebSTom Herbert .size = sizeof(struct kcm_net), 2028ab7ac4ebSTom Herbert }; 2029ab7ac4ebSTom Herbert 2030ab7ac4ebSTom Herbert static int __init kcm_init(void) 2031ab7ac4ebSTom Herbert { 2032ab7ac4ebSTom Herbert int err = -ENOMEM; 2033ab7ac4ebSTom Herbert 2034ab7ac4ebSTom Herbert kcm_muxp = kmem_cache_create("kcm_mux_cache", 2035ab7ac4ebSTom Herbert sizeof(struct kcm_mux), 0, 2036c2115240SYueHaibing SLAB_HWCACHE_ALIGN, NULL); 2037ab7ac4ebSTom Herbert if (!kcm_muxp) 2038ab7ac4ebSTom Herbert goto fail; 2039ab7ac4ebSTom Herbert 2040ab7ac4ebSTom Herbert kcm_psockp = kmem_cache_create("kcm_psock_cache", 2041ab7ac4ebSTom Herbert sizeof(struct kcm_psock), 0, 2042c2115240SYueHaibing SLAB_HWCACHE_ALIGN, NULL); 2043ab7ac4ebSTom Herbert if (!kcm_psockp) 2044ab7ac4ebSTom Herbert goto fail; 2045ab7ac4ebSTom Herbert 2046ab7ac4ebSTom Herbert kcm_wq = create_singlethread_workqueue("kkcmd"); 2047ab7ac4ebSTom Herbert if (!kcm_wq) 2048ab7ac4ebSTom Herbert goto fail; 2049ab7ac4ebSTom Herbert 2050ab7ac4ebSTom Herbert err = proto_register(&kcm_proto, 1); 2051ab7ac4ebSTom Herbert if (err) 2052ab7ac4ebSTom Herbert goto fail; 2053ab7ac4ebSTom Herbert 2054ab7ac4ebSTom Herbert err = register_pernet_device(&kcm_net_ops); 2055ab7ac4ebSTom Herbert if (err) 2056ab7ac4ebSTom Herbert goto net_ops_fail; 2057ab7ac4ebSTom Herbert 20583c446e6fSJiri Slaby err = sock_register(&kcm_family_ops); 20593c446e6fSJiri Slaby if (err) 20603c446e6fSJiri Slaby goto sock_register_fail; 20613c446e6fSJiri Slaby 2062cd6e111bSTom Herbert err = kcm_proc_init(); 2063cd6e111bSTom Herbert if (err) 2064cd6e111bSTom Herbert goto proc_init_fail; 2065cd6e111bSTom Herbert 2066ab7ac4ebSTom Herbert return 0; 2067ab7ac4ebSTom Herbert 2068cd6e111bSTom Herbert proc_init_fail: 2069ab7ac4ebSTom Herbert sock_unregister(PF_KCM); 2070ab7ac4ebSTom Herbert 2071ab7ac4ebSTom Herbert sock_register_fail: 20723c446e6fSJiri Slaby unregister_pernet_device(&kcm_net_ops); 20733c446e6fSJiri Slaby 20743c446e6fSJiri Slaby net_ops_fail: 2075ab7ac4ebSTom Herbert proto_unregister(&kcm_proto); 2076ab7ac4ebSTom Herbert 2077ab7ac4ebSTom Herbert fail: 2078ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_muxp); 2079ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_psockp); 2080ab7ac4ebSTom Herbert 2081ab7ac4ebSTom Herbert if (kcm_wq) 2082ab7ac4ebSTom Herbert destroy_workqueue(kcm_wq); 2083ab7ac4ebSTom Herbert 2084ab7ac4ebSTom Herbert return err; 2085ab7ac4ebSTom Herbert } 2086ab7ac4ebSTom Herbert 2087ab7ac4ebSTom Herbert static void __exit kcm_exit(void) 2088ab7ac4ebSTom Herbert { 2089cd6e111bSTom Herbert kcm_proc_exit(); 2090ab7ac4ebSTom Herbert sock_unregister(PF_KCM); 20913c446e6fSJiri Slaby unregister_pernet_device(&kcm_net_ops); 2092ab7ac4ebSTom Herbert proto_unregister(&kcm_proto); 2093ab7ac4ebSTom Herbert destroy_workqueue(kcm_wq); 2094ab7ac4ebSTom Herbert 2095ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_muxp); 2096ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_psockp); 2097ab7ac4ebSTom Herbert } 2098ab7ac4ebSTom Herbert 2099ab7ac4ebSTom Herbert module_init(kcm_init); 2100ab7ac4ebSTom Herbert module_exit(kcm_exit); 2101ab7ac4ebSTom Herbert 2102ab7ac4ebSTom Herbert MODULE_LICENSE("GPL"); 2103ab7ac4ebSTom Herbert MODULE_ALIAS_NETPROTO(PF_KCM); 2104