19b73896aSTom Herbert /* 29b73896aSTom Herbert * Kernel Connection Multiplexor 39b73896aSTom Herbert * 49b73896aSTom Herbert * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> 59b73896aSTom Herbert * 69b73896aSTom Herbert * This program is free software; you can redistribute it and/or modify 79b73896aSTom Herbert * it under the terms of the GNU General Public License version 2 89b73896aSTom Herbert * as published by the Free Software Foundation. 99b73896aSTom Herbert */ 109b73896aSTom Herbert 11ab7ac4ebSTom Herbert #include <linux/bpf.h> 12ab7ac4ebSTom Herbert #include <linux/errno.h> 13ab7ac4ebSTom Herbert #include <linux/errqueue.h> 14ab7ac4ebSTom Herbert #include <linux/file.h> 15ab7ac4ebSTom Herbert #include <linux/in.h> 16ab7ac4ebSTom Herbert #include <linux/kernel.h> 17ab7ac4ebSTom Herbert #include <linux/module.h> 18ab7ac4ebSTom Herbert #include <linux/net.h> 19ab7ac4ebSTom Herbert #include <linux/netdevice.h> 20ab7ac4ebSTom Herbert #include <linux/poll.h> 21ab7ac4ebSTom Herbert #include <linux/rculist.h> 22ab7ac4ebSTom Herbert #include <linux/skbuff.h> 23ab7ac4ebSTom Herbert #include <linux/socket.h> 24ab7ac4ebSTom Herbert #include <linux/uaccess.h> 25ab7ac4ebSTom Herbert #include <linux/workqueue.h> 26c0338affSWANG Cong #include <linux/syscalls.h> 27174cd4b1SIngo Molnar #include <linux/sched/signal.h> 28174cd4b1SIngo Molnar 29ab7ac4ebSTom Herbert #include <net/kcm.h> 30ab7ac4ebSTom Herbert #include <net/netns/generic.h> 31ab7ac4ebSTom Herbert #include <net/sock.h> 32ab7ac4ebSTom Herbert #include <uapi/linux/kcm.h> 33ab7ac4ebSTom Herbert 34ab7ac4ebSTom Herbert unsigned int kcm_net_id; 35ab7ac4ebSTom Herbert 36ab7ac4ebSTom Herbert static struct kmem_cache *kcm_psockp __read_mostly; 37ab7ac4ebSTom Herbert static struct kmem_cache *kcm_muxp __read_mostly; 38ab7ac4ebSTom Herbert static struct workqueue_struct *kcm_wq; 39ab7ac4ebSTom Herbert 40ab7ac4ebSTom Herbert static inline struct kcm_sock *kcm_sk(const struct sock *sk) 41ab7ac4ebSTom Herbert { 42ab7ac4ebSTom Herbert return (struct kcm_sock *)sk; 43ab7ac4ebSTom Herbert } 44ab7ac4ebSTom Herbert 45ab7ac4ebSTom Herbert static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) 46ab7ac4ebSTom Herbert { 47ab7ac4ebSTom Herbert return (struct kcm_tx_msg *)skb->cb; 48ab7ac4ebSTom Herbert } 49ab7ac4ebSTom Herbert 50ab7ac4ebSTom Herbert static void report_csk_error(struct sock *csk, int err) 51ab7ac4ebSTom Herbert { 52ab7ac4ebSTom Herbert csk->sk_err = EPIPE; 53ab7ac4ebSTom Herbert csk->sk_error_report(csk); 54ab7ac4ebSTom Herbert } 55ab7ac4ebSTom Herbert 56ab7ac4ebSTom Herbert static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, 57ab7ac4ebSTom Herbert bool wakeup_kcm) 58ab7ac4ebSTom Herbert { 59ab7ac4ebSTom Herbert struct sock *csk = psock->sk; 60ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 61ab7ac4ebSTom Herbert 62ab7ac4ebSTom Herbert /* Unrecoverable error in transmit */ 63ab7ac4ebSTom Herbert 64ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 65ab7ac4ebSTom Herbert 66ab7ac4ebSTom Herbert if (psock->tx_stopped) { 67ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 68ab7ac4ebSTom Herbert return; 69ab7ac4ebSTom Herbert } 70ab7ac4ebSTom Herbert 71ab7ac4ebSTom Herbert psock->tx_stopped = 1; 72cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.tx_aborts); 73ab7ac4ebSTom Herbert 74ab7ac4ebSTom Herbert if (!psock->tx_kcm) { 75ab7ac4ebSTom Herbert /* Take off psocks_avail list */ 76ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 77ab7ac4ebSTom Herbert } else if (wakeup_kcm) { 78ab7ac4ebSTom Herbert /* In this case psock is being aborted while outside of 79ab7ac4ebSTom Herbert * write_msgs and psock is reserved. Schedule tx_work 80ab7ac4ebSTom Herbert * to handle the failure there. Need to commit tx_stopped 81ab7ac4ebSTom Herbert * before queuing work. 82ab7ac4ebSTom Herbert */ 83ab7ac4ebSTom Herbert smp_mb(); 84ab7ac4ebSTom Herbert 85ab7ac4ebSTom Herbert queue_work(kcm_wq, &psock->tx_kcm->tx_work); 86ab7ac4ebSTom Herbert } 87ab7ac4ebSTom Herbert 88ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 89ab7ac4ebSTom Herbert 90ab7ac4ebSTom Herbert /* Report error on lower socket */ 91ab7ac4ebSTom Herbert report_csk_error(csk, err); 92ab7ac4ebSTom Herbert } 93ab7ac4ebSTom Herbert 94cd6e111bSTom Herbert /* RX mux lock held. */ 95cd6e111bSTom Herbert static void kcm_update_rx_mux_stats(struct kcm_mux *mux, 96cd6e111bSTom Herbert struct kcm_psock *psock) 97cd6e111bSTom Herbert { 989b73896aSTom Herbert STRP_STATS_ADD(mux->stats.rx_bytes, 999b73896aSTom Herbert psock->strp.stats.rx_bytes - 1009b73896aSTom Herbert psock->saved_rx_bytes); 101cd6e111bSTom Herbert mux->stats.rx_msgs += 1029b73896aSTom Herbert psock->strp.stats.rx_msgs - psock->saved_rx_msgs; 1039b73896aSTom Herbert psock->saved_rx_msgs = psock->strp.stats.rx_msgs; 1049b73896aSTom Herbert psock->saved_rx_bytes = psock->strp.stats.rx_bytes; 105cd6e111bSTom Herbert } 106cd6e111bSTom Herbert 107cd6e111bSTom Herbert static void kcm_update_tx_mux_stats(struct kcm_mux *mux, 108cd6e111bSTom Herbert struct kcm_psock *psock) 109cd6e111bSTom Herbert { 110cd6e111bSTom Herbert KCM_STATS_ADD(mux->stats.tx_bytes, 111cd6e111bSTom Herbert psock->stats.tx_bytes - psock->saved_tx_bytes); 112cd6e111bSTom Herbert mux->stats.tx_msgs += 113cd6e111bSTom Herbert psock->stats.tx_msgs - psock->saved_tx_msgs; 114cd6e111bSTom Herbert psock->saved_tx_msgs = psock->stats.tx_msgs; 115cd6e111bSTom Herbert psock->saved_tx_bytes = psock->stats.tx_bytes; 116cd6e111bSTom Herbert } 117cd6e111bSTom Herbert 118ab7ac4ebSTom Herbert static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 119ab7ac4ebSTom Herbert 120ab7ac4ebSTom Herbert /* KCM is ready to receive messages on its queue-- either the KCM is new or 121ab7ac4ebSTom Herbert * has become unblocked after being blocked on full socket buffer. Queue any 122ab7ac4ebSTom Herbert * pending ready messages on a psock. RX mux lock held. 123ab7ac4ebSTom Herbert */ 124ab7ac4ebSTom Herbert static void kcm_rcv_ready(struct kcm_sock *kcm) 125ab7ac4ebSTom Herbert { 126ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 127ab7ac4ebSTom Herbert struct kcm_psock *psock; 128ab7ac4ebSTom Herbert struct sk_buff *skb; 129ab7ac4ebSTom Herbert 130ab7ac4ebSTom Herbert if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) 131ab7ac4ebSTom Herbert return; 132ab7ac4ebSTom Herbert 133ab7ac4ebSTom Herbert while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { 134ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 135ab7ac4ebSTom Herbert /* Assuming buffer limit has been reached */ 136ab7ac4ebSTom Herbert skb_queue_head(&mux->rx_hold_queue, skb); 137ab7ac4ebSTom Herbert WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); 138ab7ac4ebSTom Herbert return; 139ab7ac4ebSTom Herbert } 140ab7ac4ebSTom Herbert } 141ab7ac4ebSTom Herbert 142ab7ac4ebSTom Herbert while (!list_empty(&mux->psocks_ready)) { 143ab7ac4ebSTom Herbert psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, 144ab7ac4ebSTom Herbert psock_ready_list); 145ab7ac4ebSTom Herbert 146ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { 147ab7ac4ebSTom Herbert /* Assuming buffer limit has been reached */ 148ab7ac4ebSTom Herbert WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); 149ab7ac4ebSTom Herbert return; 150ab7ac4ebSTom Herbert } 151ab7ac4ebSTom Herbert 152ab7ac4ebSTom Herbert /* Consumed the ready message on the psock. Schedule rx_work to 153ab7ac4ebSTom Herbert * get more messages. 154ab7ac4ebSTom Herbert */ 155ab7ac4ebSTom Herbert list_del(&psock->psock_ready_list); 156ab7ac4ebSTom Herbert psock->ready_rx_msg = NULL; 157ab7ac4ebSTom Herbert /* Commit clearing of ready_rx_msg for queuing work */ 158ab7ac4ebSTom Herbert smp_mb(); 159ab7ac4ebSTom Herbert 1609b73896aSTom Herbert strp_unpause(&psock->strp); 1619b73896aSTom Herbert strp_check_rcv(&psock->strp); 162ab7ac4ebSTom Herbert } 163ab7ac4ebSTom Herbert 164ab7ac4ebSTom Herbert /* Buffer limit is okay now, add to ready list */ 165ab7ac4ebSTom Herbert list_add_tail(&kcm->wait_rx_list, 166ab7ac4ebSTom Herbert &kcm->mux->kcm_rx_waiters); 167ab7ac4ebSTom Herbert kcm->rx_wait = true; 168ab7ac4ebSTom Herbert } 169ab7ac4ebSTom Herbert 170ab7ac4ebSTom Herbert static void kcm_rfree(struct sk_buff *skb) 171ab7ac4ebSTom Herbert { 172ab7ac4ebSTom Herbert struct sock *sk = skb->sk; 173ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 174ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 175ab7ac4ebSTom Herbert unsigned int len = skb->truesize; 176ab7ac4ebSTom Herbert 177ab7ac4ebSTom Herbert sk_mem_uncharge(sk, len); 178ab7ac4ebSTom Herbert atomic_sub(len, &sk->sk_rmem_alloc); 179ab7ac4ebSTom Herbert 180ab7ac4ebSTom Herbert /* For reading rx_wait and rx_psock without holding lock */ 181ab7ac4ebSTom Herbert smp_mb__after_atomic(); 182ab7ac4ebSTom Herbert 183ab7ac4ebSTom Herbert if (!kcm->rx_wait && !kcm->rx_psock && 184ab7ac4ebSTom Herbert sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { 185ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 186ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 187ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 188ab7ac4ebSTom Herbert } 189ab7ac4ebSTom Herbert } 190ab7ac4ebSTom Herbert 191ab7ac4ebSTom Herbert static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 192ab7ac4ebSTom Herbert { 193ab7ac4ebSTom Herbert struct sk_buff_head *list = &sk->sk_receive_queue; 194ab7ac4ebSTom Herbert 195ab7ac4ebSTom Herbert if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 196ab7ac4ebSTom Herbert return -ENOMEM; 197ab7ac4ebSTom Herbert 198ab7ac4ebSTom Herbert if (!sk_rmem_schedule(sk, skb, skb->truesize)) 199ab7ac4ebSTom Herbert return -ENOBUFS; 200ab7ac4ebSTom Herbert 201ab7ac4ebSTom Herbert skb->dev = NULL; 202ab7ac4ebSTom Herbert 203ab7ac4ebSTom Herbert skb_orphan(skb); 204ab7ac4ebSTom Herbert skb->sk = sk; 205ab7ac4ebSTom Herbert skb->destructor = kcm_rfree; 206ab7ac4ebSTom Herbert atomic_add(skb->truesize, &sk->sk_rmem_alloc); 207ab7ac4ebSTom Herbert sk_mem_charge(sk, skb->truesize); 208ab7ac4ebSTom Herbert 209ab7ac4ebSTom Herbert skb_queue_tail(list, skb); 210ab7ac4ebSTom Herbert 211ab7ac4ebSTom Herbert if (!sock_flag(sk, SOCK_DEAD)) 212ab7ac4ebSTom Herbert sk->sk_data_ready(sk); 213ab7ac4ebSTom Herbert 214ab7ac4ebSTom Herbert return 0; 215ab7ac4ebSTom Herbert } 216ab7ac4ebSTom Herbert 217ab7ac4ebSTom Herbert /* Requeue received messages for a kcm socket to other kcm sockets. This is 218ab7ac4ebSTom Herbert * called with a kcm socket is receive disabled. 219ab7ac4ebSTom Herbert * RX mux lock held. 220ab7ac4ebSTom Herbert */ 221ab7ac4ebSTom Herbert static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) 222ab7ac4ebSTom Herbert { 223ab7ac4ebSTom Herbert struct sk_buff *skb; 224ab7ac4ebSTom Herbert struct kcm_sock *kcm; 225ab7ac4ebSTom Herbert 226ab7ac4ebSTom Herbert while ((skb = __skb_dequeue(head))) { 227ab7ac4ebSTom Herbert /* Reset destructor to avoid calling kcm_rcv_ready */ 228ab7ac4ebSTom Herbert skb->destructor = sock_rfree; 229ab7ac4ebSTom Herbert skb_orphan(skb); 230ab7ac4ebSTom Herbert try_again: 231ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_rx_waiters)) { 232ab7ac4ebSTom Herbert skb_queue_tail(&mux->rx_hold_queue, skb); 233ab7ac4ebSTom Herbert continue; 234ab7ac4ebSTom Herbert } 235ab7ac4ebSTom Herbert 236ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_rx_waiters, 237ab7ac4ebSTom Herbert struct kcm_sock, wait_rx_list); 238ab7ac4ebSTom Herbert 239ab7ac4ebSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 240ab7ac4ebSTom Herbert /* Should mean socket buffer full */ 241ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 242ab7ac4ebSTom Herbert kcm->rx_wait = false; 243ab7ac4ebSTom Herbert 244ab7ac4ebSTom Herbert /* Commit rx_wait to read in kcm_free */ 245ab7ac4ebSTom Herbert smp_wmb(); 246ab7ac4ebSTom Herbert 247ab7ac4ebSTom Herbert goto try_again; 248ab7ac4ebSTom Herbert } 249ab7ac4ebSTom Herbert } 250ab7ac4ebSTom Herbert } 251ab7ac4ebSTom Herbert 252ab7ac4ebSTom Herbert /* Lower sock lock held */ 253ab7ac4ebSTom Herbert static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, 254ab7ac4ebSTom Herbert struct sk_buff *head) 255ab7ac4ebSTom Herbert { 256ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 257ab7ac4ebSTom Herbert struct kcm_sock *kcm; 258ab7ac4ebSTom Herbert 259ab7ac4ebSTom Herbert WARN_ON(psock->ready_rx_msg); 260ab7ac4ebSTom Herbert 261ab7ac4ebSTom Herbert if (psock->rx_kcm) 262ab7ac4ebSTom Herbert return psock->rx_kcm; 263ab7ac4ebSTom Herbert 264ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 265ab7ac4ebSTom Herbert 266ab7ac4ebSTom Herbert if (psock->rx_kcm) { 267ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 268ab7ac4ebSTom Herbert return psock->rx_kcm; 269ab7ac4ebSTom Herbert } 270ab7ac4ebSTom Herbert 271cd6e111bSTom Herbert kcm_update_rx_mux_stats(mux, psock); 272cd6e111bSTom Herbert 273ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_rx_waiters)) { 274ab7ac4ebSTom Herbert psock->ready_rx_msg = head; 2759b73896aSTom Herbert strp_pause(&psock->strp); 276ab7ac4ebSTom Herbert list_add_tail(&psock->psock_ready_list, 277ab7ac4ebSTom Herbert &mux->psocks_ready); 278ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 279ab7ac4ebSTom Herbert return NULL; 280ab7ac4ebSTom Herbert } 281ab7ac4ebSTom Herbert 282ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_rx_waiters, 283ab7ac4ebSTom Herbert struct kcm_sock, wait_rx_list); 284ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 285ab7ac4ebSTom Herbert kcm->rx_wait = false; 286ab7ac4ebSTom Herbert 287ab7ac4ebSTom Herbert psock->rx_kcm = kcm; 288ab7ac4ebSTom Herbert kcm->rx_psock = psock; 289ab7ac4ebSTom Herbert 290ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 291ab7ac4ebSTom Herbert 292ab7ac4ebSTom Herbert return kcm; 293ab7ac4ebSTom Herbert } 294ab7ac4ebSTom Herbert 295ab7ac4ebSTom Herbert static void kcm_done(struct kcm_sock *kcm); 296ab7ac4ebSTom Herbert 297ab7ac4ebSTom Herbert static void kcm_done_work(struct work_struct *w) 298ab7ac4ebSTom Herbert { 299ab7ac4ebSTom Herbert kcm_done(container_of(w, struct kcm_sock, done_work)); 300ab7ac4ebSTom Herbert } 301ab7ac4ebSTom Herbert 302ab7ac4ebSTom Herbert /* Lower sock held */ 303ab7ac4ebSTom Herbert static void unreserve_rx_kcm(struct kcm_psock *psock, 304ab7ac4ebSTom Herbert bool rcv_ready) 305ab7ac4ebSTom Herbert { 306ab7ac4ebSTom Herbert struct kcm_sock *kcm = psock->rx_kcm; 307ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 308ab7ac4ebSTom Herbert 309ab7ac4ebSTom Herbert if (!kcm) 310ab7ac4ebSTom Herbert return; 311ab7ac4ebSTom Herbert 312ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 313ab7ac4ebSTom Herbert 314ab7ac4ebSTom Herbert psock->rx_kcm = NULL; 315ab7ac4ebSTom Herbert kcm->rx_psock = NULL; 316ab7ac4ebSTom Herbert 317ab7ac4ebSTom Herbert /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with 318ab7ac4ebSTom Herbert * kcm_rfree 319ab7ac4ebSTom Herbert */ 320ab7ac4ebSTom Herbert smp_mb(); 321ab7ac4ebSTom Herbert 322ab7ac4ebSTom Herbert if (unlikely(kcm->done)) { 323ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 324ab7ac4ebSTom Herbert 325ab7ac4ebSTom Herbert /* Need to run kcm_done in a task since we need to qcquire 326ab7ac4ebSTom Herbert * callback locks which may already be held here. 327ab7ac4ebSTom Herbert */ 328ab7ac4ebSTom Herbert INIT_WORK(&kcm->done_work, kcm_done_work); 329ab7ac4ebSTom Herbert schedule_work(&kcm->done_work); 330ab7ac4ebSTom Herbert return; 331ab7ac4ebSTom Herbert } 332ab7ac4ebSTom Herbert 333ab7ac4ebSTom Herbert if (unlikely(kcm->rx_disabled)) { 334ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); 335ab7ac4ebSTom Herbert } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { 336ab7ac4ebSTom Herbert /* Check for degenerative race with rx_wait that all 337ab7ac4ebSTom Herbert * data was dequeued (accounted for in kcm_rfree). 338ab7ac4ebSTom Herbert */ 339ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 340ab7ac4ebSTom Herbert } 341ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 342ab7ac4ebSTom Herbert } 343ab7ac4ebSTom Herbert 344ab7ac4ebSTom Herbert /* Lower sock lock held */ 34596a59083STom Herbert static void psock_data_ready(struct sock *sk) 346ab7ac4ebSTom Herbert { 347ab7ac4ebSTom Herbert struct kcm_psock *psock; 348ab7ac4ebSTom Herbert 349ab7ac4ebSTom Herbert read_lock_bh(&sk->sk_callback_lock); 350ab7ac4ebSTom Herbert 351ab7ac4ebSTom Herbert psock = (struct kcm_psock *)sk->sk_user_data; 3529b73896aSTom Herbert if (likely(psock)) 35396a59083STom Herbert strp_data_ready(&psock->strp); 354ab7ac4ebSTom Herbert 355ab7ac4ebSTom Herbert read_unlock_bh(&sk->sk_callback_lock); 356ab7ac4ebSTom Herbert } 357ab7ac4ebSTom Herbert 3589b73896aSTom Herbert /* Called with lower sock held */ 3599b73896aSTom Herbert static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) 360ab7ac4ebSTom Herbert { 3619b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3629b73896aSTom Herbert struct kcm_sock *kcm; 363ab7ac4ebSTom Herbert 3649b73896aSTom Herbert try_queue: 3659b73896aSTom Herbert kcm = reserve_rx_kcm(psock, skb); 3669b73896aSTom Herbert if (!kcm) { 3679b73896aSTom Herbert /* Unable to reserve a KCM, message is held in psock and strp 3689b73896aSTom Herbert * is paused. 369ab7ac4ebSTom Herbert */ 3709b73896aSTom Herbert return; 371ab7ac4ebSTom Herbert } 372ab7ac4ebSTom Herbert 3739b73896aSTom Herbert if (kcm_queue_rcv_skb(&kcm->sk, skb)) { 3749b73896aSTom Herbert /* Should mean socket buffer full */ 3759b73896aSTom Herbert unreserve_rx_kcm(psock, false); 3769b73896aSTom Herbert goto try_queue; 3779b73896aSTom Herbert } 378ab7ac4ebSTom Herbert } 379ab7ac4ebSTom Herbert 3809b73896aSTom Herbert static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) 381ab7ac4ebSTom Herbert { 3829b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3839b73896aSTom Herbert struct bpf_prog *prog = psock->bpf_prog; 3849b73896aSTom Herbert 3859b73896aSTom Herbert return (*prog->bpf_func)(skb, prog->insnsi); 3869b73896aSTom Herbert } 3879b73896aSTom Herbert 3889b73896aSTom Herbert static int kcm_read_sock_done(struct strparser *strp, int err) 3899b73896aSTom Herbert { 3909b73896aSTom Herbert struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); 3919b73896aSTom Herbert 3929b73896aSTom Herbert unreserve_rx_kcm(psock, true); 3939b73896aSTom Herbert 3949b73896aSTom Herbert return err; 395ab7ac4ebSTom Herbert } 396ab7ac4ebSTom Herbert 39796a59083STom Herbert static void psock_state_change(struct sock *sk) 398ab7ac4ebSTom Herbert { 399ab7ac4ebSTom Herbert /* TCP only does a POLLIN for a half close. Do a POLLHUP here 400ab7ac4ebSTom Herbert * since application will normally not poll with POLLIN 401ab7ac4ebSTom Herbert * on the TCP sockets. 402ab7ac4ebSTom Herbert */ 403ab7ac4ebSTom Herbert 404ab7ac4ebSTom Herbert report_csk_error(sk, EPIPE); 405ab7ac4ebSTom Herbert } 406ab7ac4ebSTom Herbert 40796a59083STom Herbert static void psock_write_space(struct sock *sk) 408ab7ac4ebSTom Herbert { 409ab7ac4ebSTom Herbert struct kcm_psock *psock; 410ab7ac4ebSTom Herbert struct kcm_mux *mux; 411ab7ac4ebSTom Herbert struct kcm_sock *kcm; 412ab7ac4ebSTom Herbert 413ab7ac4ebSTom Herbert read_lock_bh(&sk->sk_callback_lock); 414ab7ac4ebSTom Herbert 415ab7ac4ebSTom Herbert psock = (struct kcm_psock *)sk->sk_user_data; 416ab7ac4ebSTom Herbert if (unlikely(!psock)) 417ab7ac4ebSTom Herbert goto out; 418ab7ac4ebSTom Herbert mux = psock->mux; 419ab7ac4ebSTom Herbert 420ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 421ab7ac4ebSTom Herbert 422ab7ac4ebSTom Herbert /* Check if the socket is reserved so someone is waiting for sending. */ 423ab7ac4ebSTom Herbert kcm = psock->tx_kcm; 4249b73896aSTom Herbert if (kcm && !unlikely(kcm->tx_stopped)) 425ab7ac4ebSTom Herbert queue_work(kcm_wq, &kcm->tx_work); 426ab7ac4ebSTom Herbert 427ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 428ab7ac4ebSTom Herbert out: 429ab7ac4ebSTom Herbert read_unlock_bh(&sk->sk_callback_lock); 430ab7ac4ebSTom Herbert } 431ab7ac4ebSTom Herbert 432ab7ac4ebSTom Herbert static void unreserve_psock(struct kcm_sock *kcm); 433ab7ac4ebSTom Herbert 434ab7ac4ebSTom Herbert /* kcm sock is locked. */ 435ab7ac4ebSTom Herbert static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) 436ab7ac4ebSTom Herbert { 437ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 438ab7ac4ebSTom Herbert struct kcm_psock *psock; 439ab7ac4ebSTom Herbert 440ab7ac4ebSTom Herbert psock = kcm->tx_psock; 441ab7ac4ebSTom Herbert 442ab7ac4ebSTom Herbert smp_rmb(); /* Must read tx_psock before tx_wait */ 443ab7ac4ebSTom Herbert 444ab7ac4ebSTom Herbert if (psock) { 445ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 446ab7ac4ebSTom Herbert if (unlikely(psock->tx_stopped)) 447ab7ac4ebSTom Herbert unreserve_psock(kcm); 448ab7ac4ebSTom Herbert else 449ab7ac4ebSTom Herbert return kcm->tx_psock; 450ab7ac4ebSTom Herbert } 451ab7ac4ebSTom Herbert 452ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 453ab7ac4ebSTom Herbert 454ab7ac4ebSTom Herbert /* Check again under lock to see if psock was reserved for this 455ab7ac4ebSTom Herbert * psock via psock_unreserve. 456ab7ac4ebSTom Herbert */ 457ab7ac4ebSTom Herbert psock = kcm->tx_psock; 458ab7ac4ebSTom Herbert if (unlikely(psock)) { 459ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 460ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 461ab7ac4ebSTom Herbert return kcm->tx_psock; 462ab7ac4ebSTom Herbert } 463ab7ac4ebSTom Herbert 464ab7ac4ebSTom Herbert if (!list_empty(&mux->psocks_avail)) { 465ab7ac4ebSTom Herbert psock = list_first_entry(&mux->psocks_avail, 466ab7ac4ebSTom Herbert struct kcm_psock, 467ab7ac4ebSTom Herbert psock_avail_list); 468ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 469ab7ac4ebSTom Herbert if (kcm->tx_wait) { 470ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 471ab7ac4ebSTom Herbert kcm->tx_wait = false; 472ab7ac4ebSTom Herbert } 473ab7ac4ebSTom Herbert kcm->tx_psock = psock; 474ab7ac4ebSTom Herbert psock->tx_kcm = kcm; 475cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.reserved); 476ab7ac4ebSTom Herbert } else if (!kcm->tx_wait) { 477ab7ac4ebSTom Herbert list_add_tail(&kcm->wait_psock_list, 478ab7ac4ebSTom Herbert &mux->kcm_tx_waiters); 479ab7ac4ebSTom Herbert kcm->tx_wait = true; 480ab7ac4ebSTom Herbert } 481ab7ac4ebSTom Herbert 482ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 483ab7ac4ebSTom Herbert 484ab7ac4ebSTom Herbert return psock; 485ab7ac4ebSTom Herbert } 486ab7ac4ebSTom Herbert 487ab7ac4ebSTom Herbert /* mux lock held */ 488ab7ac4ebSTom Herbert static void psock_now_avail(struct kcm_psock *psock) 489ab7ac4ebSTom Herbert { 490ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 491ab7ac4ebSTom Herbert struct kcm_sock *kcm; 492ab7ac4ebSTom Herbert 493ab7ac4ebSTom Herbert if (list_empty(&mux->kcm_tx_waiters)) { 494ab7ac4ebSTom Herbert list_add_tail(&psock->psock_avail_list, 495ab7ac4ebSTom Herbert &mux->psocks_avail); 496ab7ac4ebSTom Herbert } else { 497ab7ac4ebSTom Herbert kcm = list_first_entry(&mux->kcm_tx_waiters, 498ab7ac4ebSTom Herbert struct kcm_sock, 499ab7ac4ebSTom Herbert wait_psock_list); 500ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 501ab7ac4ebSTom Herbert kcm->tx_wait = false; 502ab7ac4ebSTom Herbert psock->tx_kcm = kcm; 503ab7ac4ebSTom Herbert 504ab7ac4ebSTom Herbert /* Commit before changing tx_psock since that is read in 505ab7ac4ebSTom Herbert * reserve_psock before queuing work. 506ab7ac4ebSTom Herbert */ 507ab7ac4ebSTom Herbert smp_mb(); 508ab7ac4ebSTom Herbert 509ab7ac4ebSTom Herbert kcm->tx_psock = psock; 510cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.reserved); 511ab7ac4ebSTom Herbert queue_work(kcm_wq, &kcm->tx_work); 512ab7ac4ebSTom Herbert } 513ab7ac4ebSTom Herbert } 514ab7ac4ebSTom Herbert 515ab7ac4ebSTom Herbert /* kcm sock is locked. */ 516ab7ac4ebSTom Herbert static void unreserve_psock(struct kcm_sock *kcm) 517ab7ac4ebSTom Herbert { 518ab7ac4ebSTom Herbert struct kcm_psock *psock; 519ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 520ab7ac4ebSTom Herbert 521ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 522ab7ac4ebSTom Herbert 523ab7ac4ebSTom Herbert psock = kcm->tx_psock; 524ab7ac4ebSTom Herbert 525ab7ac4ebSTom Herbert if (WARN_ON(!psock)) { 526ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 527ab7ac4ebSTom Herbert return; 528ab7ac4ebSTom Herbert } 529ab7ac4ebSTom Herbert 530ab7ac4ebSTom Herbert smp_rmb(); /* Read tx_psock before tx_wait */ 531ab7ac4ebSTom Herbert 532cd6e111bSTom Herbert kcm_update_tx_mux_stats(mux, psock); 533cd6e111bSTom Herbert 534ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 535ab7ac4ebSTom Herbert 536ab7ac4ebSTom Herbert kcm->tx_psock = NULL; 537ab7ac4ebSTom Herbert psock->tx_kcm = NULL; 538cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.unreserved); 539ab7ac4ebSTom Herbert 540ab7ac4ebSTom Herbert if (unlikely(psock->tx_stopped)) { 541ab7ac4ebSTom Herbert if (psock->done) { 542ab7ac4ebSTom Herbert /* Deferred free */ 543ab7ac4ebSTom Herbert list_del(&psock->psock_list); 544ab7ac4ebSTom Herbert mux->psocks_cnt--; 545ab7ac4ebSTom Herbert sock_put(psock->sk); 546ab7ac4ebSTom Herbert fput(psock->sk->sk_socket->file); 547ab7ac4ebSTom Herbert kmem_cache_free(kcm_psockp, psock); 548ab7ac4ebSTom Herbert } 549ab7ac4ebSTom Herbert 550ab7ac4ebSTom Herbert /* Don't put back on available list */ 551ab7ac4ebSTom Herbert 552ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 553ab7ac4ebSTom Herbert 554ab7ac4ebSTom Herbert return; 555ab7ac4ebSTom Herbert } 556ab7ac4ebSTom Herbert 557ab7ac4ebSTom Herbert psock_now_avail(psock); 558ab7ac4ebSTom Herbert 559ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 560ab7ac4ebSTom Herbert } 561ab7ac4ebSTom Herbert 562cd6e111bSTom Herbert static void kcm_report_tx_retry(struct kcm_sock *kcm) 563cd6e111bSTom Herbert { 564cd6e111bSTom Herbert struct kcm_mux *mux = kcm->mux; 565cd6e111bSTom Herbert 566cd6e111bSTom Herbert spin_lock_bh(&mux->lock); 567cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.tx_retries); 568cd6e111bSTom Herbert spin_unlock_bh(&mux->lock); 569cd6e111bSTom Herbert } 570cd6e111bSTom Herbert 571ab7ac4ebSTom Herbert /* Write any messages ready on the kcm socket. Called with kcm sock lock 572ab7ac4ebSTom Herbert * held. Return bytes actually sent or error. 573ab7ac4ebSTom Herbert */ 574ab7ac4ebSTom Herbert static int kcm_write_msgs(struct kcm_sock *kcm) 575ab7ac4ebSTom Herbert { 576ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 577ab7ac4ebSTom Herbert struct kcm_psock *psock; 578ab7ac4ebSTom Herbert struct sk_buff *skb, *head; 579ab7ac4ebSTom Herbert struct kcm_tx_msg *txm; 580ab7ac4ebSTom Herbert unsigned short fragidx, frag_offset; 581ab7ac4ebSTom Herbert unsigned int sent, total_sent = 0; 582ab7ac4ebSTom Herbert int ret = 0; 583ab7ac4ebSTom Herbert 584ab7ac4ebSTom Herbert kcm->tx_wait_more = false; 585ab7ac4ebSTom Herbert psock = kcm->tx_psock; 586ab7ac4ebSTom Herbert if (unlikely(psock && psock->tx_stopped)) { 587ab7ac4ebSTom Herbert /* A reserved psock was aborted asynchronously. Unreserve 588ab7ac4ebSTom Herbert * it and we'll retry the message. 589ab7ac4ebSTom Herbert */ 590ab7ac4ebSTom Herbert unreserve_psock(kcm); 591cd6e111bSTom Herbert kcm_report_tx_retry(kcm); 592ab7ac4ebSTom Herbert if (skb_queue_empty(&sk->sk_write_queue)) 593ab7ac4ebSTom Herbert return 0; 594ab7ac4ebSTom Herbert 595ab7ac4ebSTom Herbert kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; 596ab7ac4ebSTom Herbert 597ab7ac4ebSTom Herbert } else if (skb_queue_empty(&sk->sk_write_queue)) { 598ab7ac4ebSTom Herbert return 0; 599ab7ac4ebSTom Herbert } 600ab7ac4ebSTom Herbert 601ab7ac4ebSTom Herbert head = skb_peek(&sk->sk_write_queue); 602ab7ac4ebSTom Herbert txm = kcm_tx_msg(head); 603ab7ac4ebSTom Herbert 604ab7ac4ebSTom Herbert if (txm->sent) { 605ab7ac4ebSTom Herbert /* Send of first skbuff in queue already in progress */ 606ab7ac4ebSTom Herbert if (WARN_ON(!psock)) { 607ab7ac4ebSTom Herbert ret = -EINVAL; 608ab7ac4ebSTom Herbert goto out; 609ab7ac4ebSTom Herbert } 610ab7ac4ebSTom Herbert sent = txm->sent; 611ab7ac4ebSTom Herbert frag_offset = txm->frag_offset; 612ab7ac4ebSTom Herbert fragidx = txm->fragidx; 613ab7ac4ebSTom Herbert skb = txm->frag_skb; 614ab7ac4ebSTom Herbert 615ab7ac4ebSTom Herbert goto do_frag; 616ab7ac4ebSTom Herbert } 617ab7ac4ebSTom Herbert 618ab7ac4ebSTom Herbert try_again: 619ab7ac4ebSTom Herbert psock = reserve_psock(kcm); 620ab7ac4ebSTom Herbert if (!psock) 621ab7ac4ebSTom Herbert goto out; 622ab7ac4ebSTom Herbert 623ab7ac4ebSTom Herbert do { 624ab7ac4ebSTom Herbert skb = head; 625ab7ac4ebSTom Herbert txm = kcm_tx_msg(head); 626ab7ac4ebSTom Herbert sent = 0; 627ab7ac4ebSTom Herbert 628ab7ac4ebSTom Herbert do_frag_list: 629ab7ac4ebSTom Herbert if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { 630ab7ac4ebSTom Herbert ret = -EINVAL; 631ab7ac4ebSTom Herbert goto out; 632ab7ac4ebSTom Herbert } 633ab7ac4ebSTom Herbert 634ab7ac4ebSTom Herbert for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; 635ab7ac4ebSTom Herbert fragidx++) { 636ab7ac4ebSTom Herbert skb_frag_t *frag; 637ab7ac4ebSTom Herbert 638ab7ac4ebSTom Herbert frag_offset = 0; 639ab7ac4ebSTom Herbert do_frag: 640ab7ac4ebSTom Herbert frag = &skb_shinfo(skb)->frags[fragidx]; 641ab7ac4ebSTom Herbert if (WARN_ON(!frag->size)) { 642ab7ac4ebSTom Herbert ret = -EINVAL; 643ab7ac4ebSTom Herbert goto out; 644ab7ac4ebSTom Herbert } 645ab7ac4ebSTom Herbert 646ab7ac4ebSTom Herbert ret = kernel_sendpage(psock->sk->sk_socket, 647ab7ac4ebSTom Herbert frag->page.p, 648ab7ac4ebSTom Herbert frag->page_offset + frag_offset, 649ab7ac4ebSTom Herbert frag->size - frag_offset, 650ab7ac4ebSTom Herbert MSG_DONTWAIT); 651ab7ac4ebSTom Herbert if (ret <= 0) { 652ab7ac4ebSTom Herbert if (ret == -EAGAIN) { 653ab7ac4ebSTom Herbert /* Save state to try again when there's 654ab7ac4ebSTom Herbert * write space on the socket 655ab7ac4ebSTom Herbert */ 656ab7ac4ebSTom Herbert txm->sent = sent; 657ab7ac4ebSTom Herbert txm->frag_offset = frag_offset; 658ab7ac4ebSTom Herbert txm->fragidx = fragidx; 659ab7ac4ebSTom Herbert txm->frag_skb = skb; 660ab7ac4ebSTom Herbert 661ab7ac4ebSTom Herbert ret = 0; 662ab7ac4ebSTom Herbert goto out; 663ab7ac4ebSTom Herbert } 664ab7ac4ebSTom Herbert 665ab7ac4ebSTom Herbert /* Hard failure in sending message, abort this 666ab7ac4ebSTom Herbert * psock since it has lost framing 667ab7ac4ebSTom Herbert * synchonization and retry sending the 668ab7ac4ebSTom Herbert * message from the beginning. 669ab7ac4ebSTom Herbert */ 670ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, 671ab7ac4ebSTom Herbert true); 672ab7ac4ebSTom Herbert unreserve_psock(kcm); 673ab7ac4ebSTom Herbert 674ab7ac4ebSTom Herbert txm->sent = 0; 675cd6e111bSTom Herbert kcm_report_tx_retry(kcm); 676ab7ac4ebSTom Herbert ret = 0; 677ab7ac4ebSTom Herbert 678ab7ac4ebSTom Herbert goto try_again; 679ab7ac4ebSTom Herbert } 680ab7ac4ebSTom Herbert 681ab7ac4ebSTom Herbert sent += ret; 682ab7ac4ebSTom Herbert frag_offset += ret; 683cd6e111bSTom Herbert KCM_STATS_ADD(psock->stats.tx_bytes, ret); 684ab7ac4ebSTom Herbert if (frag_offset < frag->size) { 685ab7ac4ebSTom Herbert /* Not finished with this frag */ 686ab7ac4ebSTom Herbert goto do_frag; 687ab7ac4ebSTom Herbert } 688ab7ac4ebSTom Herbert } 689ab7ac4ebSTom Herbert 690ab7ac4ebSTom Herbert if (skb == head) { 691ab7ac4ebSTom Herbert if (skb_has_frag_list(skb)) { 692ab7ac4ebSTom Herbert skb = skb_shinfo(skb)->frag_list; 693ab7ac4ebSTom Herbert goto do_frag_list; 694ab7ac4ebSTom Herbert } 695ab7ac4ebSTom Herbert } else if (skb->next) { 696ab7ac4ebSTom Herbert skb = skb->next; 697ab7ac4ebSTom Herbert goto do_frag_list; 698ab7ac4ebSTom Herbert } 699ab7ac4ebSTom Herbert 700ab7ac4ebSTom Herbert /* Successfully sent the whole packet, account for it. */ 701ab7ac4ebSTom Herbert skb_dequeue(&sk->sk_write_queue); 702ab7ac4ebSTom Herbert kfree_skb(head); 703ab7ac4ebSTom Herbert sk->sk_wmem_queued -= sent; 704ab7ac4ebSTom Herbert total_sent += sent; 705cd6e111bSTom Herbert KCM_STATS_INCR(psock->stats.tx_msgs); 706ab7ac4ebSTom Herbert } while ((head = skb_peek(&sk->sk_write_queue))); 707ab7ac4ebSTom Herbert out: 708ab7ac4ebSTom Herbert if (!head) { 709ab7ac4ebSTom Herbert /* Done with all queued messages. */ 710ab7ac4ebSTom Herbert WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); 711ab7ac4ebSTom Herbert unreserve_psock(kcm); 712ab7ac4ebSTom Herbert } 713ab7ac4ebSTom Herbert 714ab7ac4ebSTom Herbert /* Check if write space is available */ 715ab7ac4ebSTom Herbert sk->sk_write_space(sk); 716ab7ac4ebSTom Herbert 717ab7ac4ebSTom Herbert return total_sent ? : ret; 718ab7ac4ebSTom Herbert } 719ab7ac4ebSTom Herbert 720ab7ac4ebSTom Herbert static void kcm_tx_work(struct work_struct *w) 721ab7ac4ebSTom Herbert { 722ab7ac4ebSTom Herbert struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); 723ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 724ab7ac4ebSTom Herbert int err; 725ab7ac4ebSTom Herbert 726ab7ac4ebSTom Herbert lock_sock(sk); 727ab7ac4ebSTom Herbert 728ab7ac4ebSTom Herbert /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx 729ab7ac4ebSTom Herbert * aborts 730ab7ac4ebSTom Herbert */ 731ab7ac4ebSTom Herbert err = kcm_write_msgs(kcm); 732ab7ac4ebSTom Herbert if (err < 0) { 733ab7ac4ebSTom Herbert /* Hard failure in write, report error on KCM socket */ 734ab7ac4ebSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); 735ab7ac4ebSTom Herbert report_csk_error(&kcm->sk, -err); 736ab7ac4ebSTom Herbert goto out; 737ab7ac4ebSTom Herbert } 738ab7ac4ebSTom Herbert 739ab7ac4ebSTom Herbert /* Primarily for SOCK_SEQPACKET sockets */ 740ab7ac4ebSTom Herbert if (likely(sk->sk_socket) && 741ab7ac4ebSTom Herbert test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 742ab7ac4ebSTom Herbert clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 743ab7ac4ebSTom Herbert sk->sk_write_space(sk); 744ab7ac4ebSTom Herbert } 745ab7ac4ebSTom Herbert 746ab7ac4ebSTom Herbert out: 747ab7ac4ebSTom Herbert release_sock(sk); 748ab7ac4ebSTom Herbert } 749ab7ac4ebSTom Herbert 750ab7ac4ebSTom Herbert static void kcm_push(struct kcm_sock *kcm) 751ab7ac4ebSTom Herbert { 752ab7ac4ebSTom Herbert if (kcm->tx_wait_more) 753ab7ac4ebSTom Herbert kcm_write_msgs(kcm); 754ab7ac4ebSTom Herbert } 755ab7ac4ebSTom Herbert 756f29698fcSTom Herbert static ssize_t kcm_sendpage(struct socket *sock, struct page *page, 757f29698fcSTom Herbert int offset, size_t size, int flags) 758f29698fcSTom Herbert 759f29698fcSTom Herbert { 760f29698fcSTom Herbert struct sock *sk = sock->sk; 761f29698fcSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 762f29698fcSTom Herbert struct sk_buff *skb = NULL, *head = NULL; 763f29698fcSTom Herbert long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 764f29698fcSTom Herbert bool eor; 765f29698fcSTom Herbert int err = 0; 766f29698fcSTom Herbert int i; 767f29698fcSTom Herbert 768f29698fcSTom Herbert if (flags & MSG_SENDPAGE_NOTLAST) 769f29698fcSTom Herbert flags |= MSG_MORE; 770f29698fcSTom Herbert 771f29698fcSTom Herbert /* No MSG_EOR from splice, only look at MSG_MORE */ 772f29698fcSTom Herbert eor = !(flags & MSG_MORE); 773f29698fcSTom Herbert 774f29698fcSTom Herbert lock_sock(sk); 775f29698fcSTom Herbert 776f29698fcSTom Herbert sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 777f29698fcSTom Herbert 778f29698fcSTom Herbert err = -EPIPE; 779f29698fcSTom Herbert if (sk->sk_err) 780f29698fcSTom Herbert goto out_error; 781f29698fcSTom Herbert 782f29698fcSTom Herbert if (kcm->seq_skb) { 783f29698fcSTom Herbert /* Previously opened message */ 784f29698fcSTom Herbert head = kcm->seq_skb; 785f29698fcSTom Herbert skb = kcm_tx_msg(head)->last_skb; 786f29698fcSTom Herbert i = skb_shinfo(skb)->nr_frags; 787f29698fcSTom Herbert 788f29698fcSTom Herbert if (skb_can_coalesce(skb, i, page, offset)) { 789f29698fcSTom Herbert skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); 790f29698fcSTom Herbert skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 791f29698fcSTom Herbert goto coalesced; 792f29698fcSTom Herbert } 793f29698fcSTom Herbert 794f29698fcSTom Herbert if (i >= MAX_SKB_FRAGS) { 795f29698fcSTom Herbert struct sk_buff *tskb; 796f29698fcSTom Herbert 797f29698fcSTom Herbert tskb = alloc_skb(0, sk->sk_allocation); 798f29698fcSTom Herbert while (!tskb) { 799f29698fcSTom Herbert kcm_push(kcm); 800f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 801f29698fcSTom Herbert if (err) 802f29698fcSTom Herbert goto out_error; 803f29698fcSTom Herbert } 804f29698fcSTom Herbert 805f29698fcSTom Herbert if (head == skb) 806f29698fcSTom Herbert skb_shinfo(head)->frag_list = tskb; 807f29698fcSTom Herbert else 808f29698fcSTom Herbert skb->next = tskb; 809f29698fcSTom Herbert 810f29698fcSTom Herbert skb = tskb; 811f29698fcSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 812f29698fcSTom Herbert i = 0; 813f29698fcSTom Herbert } 814f29698fcSTom Herbert } else { 815f29698fcSTom Herbert /* Call the sk_stream functions to manage the sndbuf mem. */ 816f29698fcSTom Herbert if (!sk_stream_memory_free(sk)) { 817f29698fcSTom Herbert kcm_push(kcm); 818f29698fcSTom Herbert set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 819f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 820f29698fcSTom Herbert if (err) 821f29698fcSTom Herbert goto out_error; 822f29698fcSTom Herbert } 823f29698fcSTom Herbert 824f29698fcSTom Herbert head = alloc_skb(0, sk->sk_allocation); 825f29698fcSTom Herbert while (!head) { 826f29698fcSTom Herbert kcm_push(kcm); 827f29698fcSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 828f29698fcSTom Herbert if (err) 829f29698fcSTom Herbert goto out_error; 830f29698fcSTom Herbert } 831f29698fcSTom Herbert 832f29698fcSTom Herbert skb = head; 833f29698fcSTom Herbert i = 0; 834f29698fcSTom Herbert } 835f29698fcSTom Herbert 836f29698fcSTom Herbert get_page(page); 837f29698fcSTom Herbert skb_fill_page_desc(skb, i, page, offset, size); 838f29698fcSTom Herbert skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 839f29698fcSTom Herbert 840f29698fcSTom Herbert coalesced: 841f29698fcSTom Herbert skb->len += size; 842f29698fcSTom Herbert skb->data_len += size; 843f29698fcSTom Herbert skb->truesize += size; 844f29698fcSTom Herbert sk->sk_wmem_queued += size; 845f29698fcSTom Herbert sk_mem_charge(sk, size); 846f29698fcSTom Herbert 847f29698fcSTom Herbert if (head != skb) { 848f29698fcSTom Herbert head->len += size; 849f29698fcSTom Herbert head->data_len += size; 850f29698fcSTom Herbert head->truesize += size; 851f29698fcSTom Herbert } 852f29698fcSTom Herbert 853f29698fcSTom Herbert if (eor) { 854f29698fcSTom Herbert bool not_busy = skb_queue_empty(&sk->sk_write_queue); 855f29698fcSTom Herbert 856f29698fcSTom Herbert /* Message complete, queue it on send buffer */ 857f29698fcSTom Herbert __skb_queue_tail(&sk->sk_write_queue, head); 858f29698fcSTom Herbert kcm->seq_skb = NULL; 859f29698fcSTom Herbert KCM_STATS_INCR(kcm->stats.tx_msgs); 860f29698fcSTom Herbert 861f29698fcSTom Herbert if (flags & MSG_BATCH) { 862f29698fcSTom Herbert kcm->tx_wait_more = true; 863f29698fcSTom Herbert } else if (kcm->tx_wait_more || not_busy) { 864f29698fcSTom Herbert err = kcm_write_msgs(kcm); 865f29698fcSTom Herbert if (err < 0) { 866f29698fcSTom Herbert /* We got a hard error in write_msgs but have 867f29698fcSTom Herbert * already queued this message. Report an error 868f29698fcSTom Herbert * in the socket, but don't affect return value 869f29698fcSTom Herbert * from sendmsg 870f29698fcSTom Herbert */ 871f29698fcSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs\n"); 872f29698fcSTom Herbert report_csk_error(&kcm->sk, -err); 873f29698fcSTom Herbert } 874f29698fcSTom Herbert } 875f29698fcSTom Herbert } else { 876f29698fcSTom Herbert /* Message not complete, save state */ 877f29698fcSTom Herbert kcm->seq_skb = head; 878f29698fcSTom Herbert kcm_tx_msg(head)->last_skb = skb; 879f29698fcSTom Herbert } 880f29698fcSTom Herbert 881f29698fcSTom Herbert KCM_STATS_ADD(kcm->stats.tx_bytes, size); 882f29698fcSTom Herbert 883f29698fcSTom Herbert release_sock(sk); 884f29698fcSTom Herbert return size; 885f29698fcSTom Herbert 886f29698fcSTom Herbert out_error: 887f29698fcSTom Herbert kcm_push(kcm); 888f29698fcSTom Herbert 889f29698fcSTom Herbert err = sk_stream_error(sk, flags, err); 890f29698fcSTom Herbert 891f29698fcSTom Herbert /* make sure we wake any epoll edge trigger waiter */ 892f29698fcSTom Herbert if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 893f29698fcSTom Herbert sk->sk_write_space(sk); 894f29698fcSTom Herbert 895f29698fcSTom Herbert release_sock(sk); 896f29698fcSTom Herbert return err; 897f29698fcSTom Herbert } 898f29698fcSTom Herbert 899ab7ac4ebSTom Herbert static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 900ab7ac4ebSTom Herbert { 901ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 902ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 903ab7ac4ebSTom Herbert struct sk_buff *skb = NULL, *head = NULL; 904ab7ac4ebSTom Herbert size_t copy, copied = 0; 905ab7ac4ebSTom Herbert long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 906ab7ac4ebSTom Herbert int eor = (sock->type == SOCK_DGRAM) ? 907ab7ac4ebSTom Herbert !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); 908ab7ac4ebSTom Herbert int err = -EPIPE; 909ab7ac4ebSTom Herbert 910ab7ac4ebSTom Herbert lock_sock(sk); 911ab7ac4ebSTom Herbert 912ab7ac4ebSTom Herbert /* Per tcp_sendmsg this should be in poll */ 913ab7ac4ebSTom Herbert sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 914ab7ac4ebSTom Herbert 915ab7ac4ebSTom Herbert if (sk->sk_err) 916ab7ac4ebSTom Herbert goto out_error; 917ab7ac4ebSTom Herbert 918ab7ac4ebSTom Herbert if (kcm->seq_skb) { 919ab7ac4ebSTom Herbert /* Previously opened message */ 920ab7ac4ebSTom Herbert head = kcm->seq_skb; 921ab7ac4ebSTom Herbert skb = kcm_tx_msg(head)->last_skb; 922ab7ac4ebSTom Herbert goto start; 923ab7ac4ebSTom Herbert } 924ab7ac4ebSTom Herbert 925ab7ac4ebSTom Herbert /* Call the sk_stream functions to manage the sndbuf mem. */ 926ab7ac4ebSTom Herbert if (!sk_stream_memory_free(sk)) { 927ab7ac4ebSTom Herbert kcm_push(kcm); 928ab7ac4ebSTom Herbert set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 929ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 930ab7ac4ebSTom Herbert if (err) 931ab7ac4ebSTom Herbert goto out_error; 932ab7ac4ebSTom Herbert } 933ab7ac4ebSTom Herbert 93498e3862cSWANG Cong if (msg_data_left(msg)) { 935ab7ac4ebSTom Herbert /* New message, alloc head skb */ 936ab7ac4ebSTom Herbert head = alloc_skb(0, sk->sk_allocation); 937ab7ac4ebSTom Herbert while (!head) { 938ab7ac4ebSTom Herbert kcm_push(kcm); 939ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 940ab7ac4ebSTom Herbert if (err) 941ab7ac4ebSTom Herbert goto out_error; 942ab7ac4ebSTom Herbert 943ab7ac4ebSTom Herbert head = alloc_skb(0, sk->sk_allocation); 944ab7ac4ebSTom Herbert } 945ab7ac4ebSTom Herbert 946ab7ac4ebSTom Herbert skb = head; 947ab7ac4ebSTom Herbert 948ab7ac4ebSTom Herbert /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling 949ab7ac4ebSTom Herbert * csum_and_copy_from_iter from skb_do_copy_data_nocache. 950ab7ac4ebSTom Herbert */ 951ab7ac4ebSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 95298e3862cSWANG Cong } 953ab7ac4ebSTom Herbert 954ab7ac4ebSTom Herbert start: 955ab7ac4ebSTom Herbert while (msg_data_left(msg)) { 956ab7ac4ebSTom Herbert bool merge = true; 957ab7ac4ebSTom Herbert int i = skb_shinfo(skb)->nr_frags; 958ab7ac4ebSTom Herbert struct page_frag *pfrag = sk_page_frag(sk); 959ab7ac4ebSTom Herbert 960ab7ac4ebSTom Herbert if (!sk_page_frag_refill(sk, pfrag)) 961ab7ac4ebSTom Herbert goto wait_for_memory; 962ab7ac4ebSTom Herbert 963ab7ac4ebSTom Herbert if (!skb_can_coalesce(skb, i, pfrag->page, 964ab7ac4ebSTom Herbert pfrag->offset)) { 965ab7ac4ebSTom Herbert if (i == MAX_SKB_FRAGS) { 966ab7ac4ebSTom Herbert struct sk_buff *tskb; 967ab7ac4ebSTom Herbert 968ab7ac4ebSTom Herbert tskb = alloc_skb(0, sk->sk_allocation); 969ab7ac4ebSTom Herbert if (!tskb) 970ab7ac4ebSTom Herbert goto wait_for_memory; 971ab7ac4ebSTom Herbert 972ab7ac4ebSTom Herbert if (head == skb) 973ab7ac4ebSTom Herbert skb_shinfo(head)->frag_list = tskb; 974ab7ac4ebSTom Herbert else 975ab7ac4ebSTom Herbert skb->next = tskb; 976ab7ac4ebSTom Herbert 977ab7ac4ebSTom Herbert skb = tskb; 978ab7ac4ebSTom Herbert skb->ip_summed = CHECKSUM_UNNECESSARY; 979ab7ac4ebSTom Herbert continue; 980ab7ac4ebSTom Herbert } 981ab7ac4ebSTom Herbert merge = false; 982ab7ac4ebSTom Herbert } 983ab7ac4ebSTom Herbert 984ab7ac4ebSTom Herbert copy = min_t(int, msg_data_left(msg), 985ab7ac4ebSTom Herbert pfrag->size - pfrag->offset); 986ab7ac4ebSTom Herbert 987ab7ac4ebSTom Herbert if (!sk_wmem_schedule(sk, copy)) 988ab7ac4ebSTom Herbert goto wait_for_memory; 989ab7ac4ebSTom Herbert 990ab7ac4ebSTom Herbert err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, 991ab7ac4ebSTom Herbert pfrag->page, 992ab7ac4ebSTom Herbert pfrag->offset, 993ab7ac4ebSTom Herbert copy); 994ab7ac4ebSTom Herbert if (err) 995ab7ac4ebSTom Herbert goto out_error; 996ab7ac4ebSTom Herbert 997ab7ac4ebSTom Herbert /* Update the skb. */ 998ab7ac4ebSTom Herbert if (merge) { 999ab7ac4ebSTom Herbert skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1000ab7ac4ebSTom Herbert } else { 1001ab7ac4ebSTom Herbert skb_fill_page_desc(skb, i, pfrag->page, 1002ab7ac4ebSTom Herbert pfrag->offset, copy); 1003ab7ac4ebSTom Herbert get_page(pfrag->page); 1004ab7ac4ebSTom Herbert } 1005ab7ac4ebSTom Herbert 1006ab7ac4ebSTom Herbert pfrag->offset += copy; 1007ab7ac4ebSTom Herbert copied += copy; 1008ab7ac4ebSTom Herbert if (head != skb) { 1009ab7ac4ebSTom Herbert head->len += copy; 1010ab7ac4ebSTom Herbert head->data_len += copy; 1011ab7ac4ebSTom Herbert } 1012ab7ac4ebSTom Herbert 1013ab7ac4ebSTom Herbert continue; 1014ab7ac4ebSTom Herbert 1015ab7ac4ebSTom Herbert wait_for_memory: 1016ab7ac4ebSTom Herbert kcm_push(kcm); 1017ab7ac4ebSTom Herbert err = sk_stream_wait_memory(sk, &timeo); 1018ab7ac4ebSTom Herbert if (err) 1019ab7ac4ebSTom Herbert goto out_error; 1020ab7ac4ebSTom Herbert } 1021ab7ac4ebSTom Herbert 1022ab7ac4ebSTom Herbert if (eor) { 1023ab7ac4ebSTom Herbert bool not_busy = skb_queue_empty(&sk->sk_write_queue); 1024ab7ac4ebSTom Herbert 102598e3862cSWANG Cong if (head) { 1026ab7ac4ebSTom Herbert /* Message complete, queue it on send buffer */ 1027ab7ac4ebSTom Herbert __skb_queue_tail(&sk->sk_write_queue, head); 1028ab7ac4ebSTom Herbert kcm->seq_skb = NULL; 1029cd6e111bSTom Herbert KCM_STATS_INCR(kcm->stats.tx_msgs); 103098e3862cSWANG Cong } 1031ab7ac4ebSTom Herbert 1032ab7ac4ebSTom Herbert if (msg->msg_flags & MSG_BATCH) { 1033ab7ac4ebSTom Herbert kcm->tx_wait_more = true; 1034ab7ac4ebSTom Herbert } else if (kcm->tx_wait_more || not_busy) { 1035ab7ac4ebSTom Herbert err = kcm_write_msgs(kcm); 1036ab7ac4ebSTom Herbert if (err < 0) { 1037ab7ac4ebSTom Herbert /* We got a hard error in write_msgs but have 1038ab7ac4ebSTom Herbert * already queued this message. Report an error 1039ab7ac4ebSTom Herbert * in the socket, but don't affect return value 1040ab7ac4ebSTom Herbert * from sendmsg 1041ab7ac4ebSTom Herbert */ 1042ab7ac4ebSTom Herbert pr_warn("KCM: Hard failure on kcm_write_msgs\n"); 1043ab7ac4ebSTom Herbert report_csk_error(&kcm->sk, -err); 1044ab7ac4ebSTom Herbert } 1045ab7ac4ebSTom Herbert } 1046ab7ac4ebSTom Herbert } else { 1047ab7ac4ebSTom Herbert /* Message not complete, save state */ 1048ab7ac4ebSTom Herbert partial_message: 1049cd27b96bSWANG Cong if (head) { 1050ab7ac4ebSTom Herbert kcm->seq_skb = head; 1051ab7ac4ebSTom Herbert kcm_tx_msg(head)->last_skb = skb; 1052ab7ac4ebSTom Herbert } 1053cd27b96bSWANG Cong } 1054ab7ac4ebSTom Herbert 1055cd6e111bSTom Herbert KCM_STATS_ADD(kcm->stats.tx_bytes, copied); 1056cd6e111bSTom Herbert 1057ab7ac4ebSTom Herbert release_sock(sk); 1058ab7ac4ebSTom Herbert return copied; 1059ab7ac4ebSTom Herbert 1060ab7ac4ebSTom Herbert out_error: 1061ab7ac4ebSTom Herbert kcm_push(kcm); 1062ab7ac4ebSTom Herbert 1063ab7ac4ebSTom Herbert if (copied && sock->type == SOCK_SEQPACKET) { 1064ab7ac4ebSTom Herbert /* Wrote some bytes before encountering an 1065ab7ac4ebSTom Herbert * error, return partial success. 1066ab7ac4ebSTom Herbert */ 1067ab7ac4ebSTom Herbert goto partial_message; 1068ab7ac4ebSTom Herbert } 1069ab7ac4ebSTom Herbert 1070ab7ac4ebSTom Herbert if (head != kcm->seq_skb) 1071ab7ac4ebSTom Herbert kfree_skb(head); 1072ab7ac4ebSTom Herbert 1073ab7ac4ebSTom Herbert err = sk_stream_error(sk, msg->msg_flags, err); 1074ab7ac4ebSTom Herbert 1075ab7ac4ebSTom Herbert /* make sure we wake any epoll edge trigger waiter */ 1076ab7ac4ebSTom Herbert if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 1077ab7ac4ebSTom Herbert sk->sk_write_space(sk); 1078ab7ac4ebSTom Herbert 1079ab7ac4ebSTom Herbert release_sock(sk); 1080ab7ac4ebSTom Herbert return err; 1081ab7ac4ebSTom Herbert } 1082ab7ac4ebSTom Herbert 1083ab7ac4ebSTom Herbert static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, 1084ab7ac4ebSTom Herbert long timeo, int *err) 1085ab7ac4ebSTom Herbert { 1086ab7ac4ebSTom Herbert struct sk_buff *skb; 1087ab7ac4ebSTom Herbert 1088ab7ac4ebSTom Herbert while (!(skb = skb_peek(&sk->sk_receive_queue))) { 1089ab7ac4ebSTom Herbert if (sk->sk_err) { 1090ab7ac4ebSTom Herbert *err = sock_error(sk); 1091ab7ac4ebSTom Herbert return NULL; 1092ab7ac4ebSTom Herbert } 1093ab7ac4ebSTom Herbert 1094ab7ac4ebSTom Herbert if (sock_flag(sk, SOCK_DONE)) 1095ab7ac4ebSTom Herbert return NULL; 1096ab7ac4ebSTom Herbert 1097ab7ac4ebSTom Herbert if ((flags & MSG_DONTWAIT) || !timeo) { 1098ab7ac4ebSTom Herbert *err = -EAGAIN; 1099ab7ac4ebSTom Herbert return NULL; 1100ab7ac4ebSTom Herbert } 1101ab7ac4ebSTom Herbert 1102ab7ac4ebSTom Herbert sk_wait_data(sk, &timeo, NULL); 1103ab7ac4ebSTom Herbert 1104ab7ac4ebSTom Herbert /* Handle signals */ 1105ab7ac4ebSTom Herbert if (signal_pending(current)) { 1106ab7ac4ebSTom Herbert *err = sock_intr_errno(timeo); 1107ab7ac4ebSTom Herbert return NULL; 1108ab7ac4ebSTom Herbert } 1109ab7ac4ebSTom Herbert } 1110ab7ac4ebSTom Herbert 1111ab7ac4ebSTom Herbert return skb; 1112ab7ac4ebSTom Herbert } 1113ab7ac4ebSTom Herbert 1114ab7ac4ebSTom Herbert static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, 1115ab7ac4ebSTom Herbert size_t len, int flags) 1116ab7ac4ebSTom Herbert { 1117ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 1118cd6e111bSTom Herbert struct kcm_sock *kcm = kcm_sk(sk); 1119ab7ac4ebSTom Herbert int err = 0; 1120ab7ac4ebSTom Herbert long timeo; 11219b73896aSTom Herbert struct strp_rx_msg *rxm; 1122ab7ac4ebSTom Herbert int copied = 0; 1123ab7ac4ebSTom Herbert struct sk_buff *skb; 1124ab7ac4ebSTom Herbert 1125ab7ac4ebSTom Herbert timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1126ab7ac4ebSTom Herbert 1127ab7ac4ebSTom Herbert lock_sock(sk); 1128ab7ac4ebSTom Herbert 1129ab7ac4ebSTom Herbert skb = kcm_wait_data(sk, flags, timeo, &err); 1130ab7ac4ebSTom Herbert if (!skb) 1131ab7ac4ebSTom Herbert goto out; 1132ab7ac4ebSTom Herbert 1133ab7ac4ebSTom Herbert /* Okay, have a message on the receive queue */ 1134ab7ac4ebSTom Herbert 11359b73896aSTom Herbert rxm = strp_rx_msg(skb); 1136ab7ac4ebSTom Herbert 1137ab7ac4ebSTom Herbert if (len > rxm->full_len) 1138ab7ac4ebSTom Herbert len = rxm->full_len; 1139ab7ac4ebSTom Herbert 1140ab7ac4ebSTom Herbert err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); 1141ab7ac4ebSTom Herbert if (err < 0) 1142ab7ac4ebSTom Herbert goto out; 1143ab7ac4ebSTom Herbert 1144ab7ac4ebSTom Herbert copied = len; 1145ab7ac4ebSTom Herbert if (likely(!(flags & MSG_PEEK))) { 1146cd6e111bSTom Herbert KCM_STATS_ADD(kcm->stats.rx_bytes, copied); 1147ab7ac4ebSTom Herbert if (copied < rxm->full_len) { 1148ab7ac4ebSTom Herbert if (sock->type == SOCK_DGRAM) { 1149ab7ac4ebSTom Herbert /* Truncated message */ 1150ab7ac4ebSTom Herbert msg->msg_flags |= MSG_TRUNC; 1151ab7ac4ebSTom Herbert goto msg_finished; 1152ab7ac4ebSTom Herbert } 1153ab7ac4ebSTom Herbert rxm->offset += copied; 1154ab7ac4ebSTom Herbert rxm->full_len -= copied; 1155ab7ac4ebSTom Herbert } else { 1156ab7ac4ebSTom Herbert msg_finished: 1157ab7ac4ebSTom Herbert /* Finished with message */ 1158ab7ac4ebSTom Herbert msg->msg_flags |= MSG_EOR; 1159cd6e111bSTom Herbert KCM_STATS_INCR(kcm->stats.rx_msgs); 1160ab7ac4ebSTom Herbert skb_unlink(skb, &sk->sk_receive_queue); 1161ab7ac4ebSTom Herbert kfree_skb(skb); 1162ab7ac4ebSTom Herbert } 1163ab7ac4ebSTom Herbert } 1164ab7ac4ebSTom Herbert 1165ab7ac4ebSTom Herbert out: 1166ab7ac4ebSTom Herbert release_sock(sk); 1167ab7ac4ebSTom Herbert 1168ab7ac4ebSTom Herbert return copied ? : err; 1169ab7ac4ebSTom Herbert } 1170ab7ac4ebSTom Herbert 117191687355STom Herbert static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, 117291687355STom Herbert struct pipe_inode_info *pipe, size_t len, 117391687355STom Herbert unsigned int flags) 117491687355STom Herbert { 117591687355STom Herbert struct sock *sk = sock->sk; 117691687355STom Herbert struct kcm_sock *kcm = kcm_sk(sk); 117791687355STom Herbert long timeo; 11789b73896aSTom Herbert struct strp_rx_msg *rxm; 117991687355STom Herbert int err = 0; 1180f1971a2eSWANG Cong ssize_t copied; 118191687355STom Herbert struct sk_buff *skb; 118291687355STom Herbert 118391687355STom Herbert /* Only support splice for SOCKSEQPACKET */ 118491687355STom Herbert 118591687355STom Herbert timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 118691687355STom Herbert 118791687355STom Herbert lock_sock(sk); 118891687355STom Herbert 118991687355STom Herbert skb = kcm_wait_data(sk, flags, timeo, &err); 119091687355STom Herbert if (!skb) 119191687355STom Herbert goto err_out; 119291687355STom Herbert 119391687355STom Herbert /* Okay, have a message on the receive queue */ 119491687355STom Herbert 11959b73896aSTom Herbert rxm = strp_rx_msg(skb); 119691687355STom Herbert 119791687355STom Herbert if (len > rxm->full_len) 119891687355STom Herbert len = rxm->full_len; 119991687355STom Herbert 120025869262SAl Viro copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags); 120191687355STom Herbert if (copied < 0) { 120291687355STom Herbert err = copied; 120391687355STom Herbert goto err_out; 120491687355STom Herbert } 120591687355STom Herbert 120691687355STom Herbert KCM_STATS_ADD(kcm->stats.rx_bytes, copied); 120791687355STom Herbert 120891687355STom Herbert rxm->offset += copied; 120991687355STom Herbert rxm->full_len -= copied; 121091687355STom Herbert 121191687355STom Herbert /* We have no way to return MSG_EOR. If all the bytes have been 121291687355STom Herbert * read we still leave the message in the receive socket buffer. 121391687355STom Herbert * A subsequent recvmsg needs to be done to return MSG_EOR and 121491687355STom Herbert * finish reading the message. 121591687355STom Herbert */ 121691687355STom Herbert 121791687355STom Herbert release_sock(sk); 121891687355STom Herbert 121991687355STom Herbert return copied; 122091687355STom Herbert 122191687355STom Herbert err_out: 122291687355STom Herbert release_sock(sk); 122391687355STom Herbert 122491687355STom Herbert return err; 122591687355STom Herbert } 122691687355STom Herbert 1227ab7ac4ebSTom Herbert /* kcm sock lock held */ 1228ab7ac4ebSTom Herbert static void kcm_recv_disable(struct kcm_sock *kcm) 1229ab7ac4ebSTom Herbert { 1230ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1231ab7ac4ebSTom Herbert 1232ab7ac4ebSTom Herbert if (kcm->rx_disabled) 1233ab7ac4ebSTom Herbert return; 1234ab7ac4ebSTom Herbert 1235ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1236ab7ac4ebSTom Herbert 1237ab7ac4ebSTom Herbert kcm->rx_disabled = 1; 1238ab7ac4ebSTom Herbert 1239ab7ac4ebSTom Herbert /* If a psock is reserved we'll do cleanup in unreserve */ 1240ab7ac4ebSTom Herbert if (!kcm->rx_psock) { 1241ab7ac4ebSTom Herbert if (kcm->rx_wait) { 1242ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 1243ab7ac4ebSTom Herbert kcm->rx_wait = false; 1244ab7ac4ebSTom Herbert } 1245ab7ac4ebSTom Herbert 1246ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); 1247ab7ac4ebSTom Herbert } 1248ab7ac4ebSTom Herbert 1249ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1250ab7ac4ebSTom Herbert } 1251ab7ac4ebSTom Herbert 1252ab7ac4ebSTom Herbert /* kcm sock lock held */ 1253ab7ac4ebSTom Herbert static void kcm_recv_enable(struct kcm_sock *kcm) 1254ab7ac4ebSTom Herbert { 1255ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1256ab7ac4ebSTom Herbert 1257ab7ac4ebSTom Herbert if (!kcm->rx_disabled) 1258ab7ac4ebSTom Herbert return; 1259ab7ac4ebSTom Herbert 1260ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1261ab7ac4ebSTom Herbert 1262ab7ac4ebSTom Herbert kcm->rx_disabled = 0; 1263ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 1264ab7ac4ebSTom Herbert 1265ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1266ab7ac4ebSTom Herbert } 1267ab7ac4ebSTom Herbert 1268ab7ac4ebSTom Herbert static int kcm_setsockopt(struct socket *sock, int level, int optname, 1269ab7ac4ebSTom Herbert char __user *optval, unsigned int optlen) 1270ab7ac4ebSTom Herbert { 1271ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1272ab7ac4ebSTom Herbert int val, valbool; 1273ab7ac4ebSTom Herbert int err = 0; 1274ab7ac4ebSTom Herbert 1275ab7ac4ebSTom Herbert if (level != SOL_KCM) 1276ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1277ab7ac4ebSTom Herbert 1278ab7ac4ebSTom Herbert if (optlen < sizeof(int)) 1279ab7ac4ebSTom Herbert return -EINVAL; 1280ab7ac4ebSTom Herbert 1281ab7ac4ebSTom Herbert if (get_user(val, (int __user *)optval)) 1282ab7ac4ebSTom Herbert return -EINVAL; 1283ab7ac4ebSTom Herbert 1284ab7ac4ebSTom Herbert valbool = val ? 1 : 0; 1285ab7ac4ebSTom Herbert 1286ab7ac4ebSTom Herbert switch (optname) { 1287ab7ac4ebSTom Herbert case KCM_RECV_DISABLE: 1288ab7ac4ebSTom Herbert lock_sock(&kcm->sk); 1289ab7ac4ebSTom Herbert if (valbool) 1290ab7ac4ebSTom Herbert kcm_recv_disable(kcm); 1291ab7ac4ebSTom Herbert else 1292ab7ac4ebSTom Herbert kcm_recv_enable(kcm); 1293ab7ac4ebSTom Herbert release_sock(&kcm->sk); 1294ab7ac4ebSTom Herbert break; 1295ab7ac4ebSTom Herbert default: 1296ab7ac4ebSTom Herbert err = -ENOPROTOOPT; 1297ab7ac4ebSTom Herbert } 1298ab7ac4ebSTom Herbert 1299ab7ac4ebSTom Herbert return err; 1300ab7ac4ebSTom Herbert } 1301ab7ac4ebSTom Herbert 1302ab7ac4ebSTom Herbert static int kcm_getsockopt(struct socket *sock, int level, int optname, 1303ab7ac4ebSTom Herbert char __user *optval, int __user *optlen) 1304ab7ac4ebSTom Herbert { 1305ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1306ab7ac4ebSTom Herbert int val, len; 1307ab7ac4ebSTom Herbert 1308ab7ac4ebSTom Herbert if (level != SOL_KCM) 1309ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1310ab7ac4ebSTom Herbert 1311ab7ac4ebSTom Herbert if (get_user(len, optlen)) 1312ab7ac4ebSTom Herbert return -EFAULT; 1313ab7ac4ebSTom Herbert 1314ab7ac4ebSTom Herbert len = min_t(unsigned int, len, sizeof(int)); 1315ab7ac4ebSTom Herbert if (len < 0) 1316ab7ac4ebSTom Herbert return -EINVAL; 1317ab7ac4ebSTom Herbert 1318ab7ac4ebSTom Herbert switch (optname) { 1319ab7ac4ebSTom Herbert case KCM_RECV_DISABLE: 1320ab7ac4ebSTom Herbert val = kcm->rx_disabled; 1321ab7ac4ebSTom Herbert break; 1322ab7ac4ebSTom Herbert default: 1323ab7ac4ebSTom Herbert return -ENOPROTOOPT; 1324ab7ac4ebSTom Herbert } 1325ab7ac4ebSTom Herbert 1326ab7ac4ebSTom Herbert if (put_user(len, optlen)) 1327ab7ac4ebSTom Herbert return -EFAULT; 1328ab7ac4ebSTom Herbert if (copy_to_user(optval, &val, len)) 1329ab7ac4ebSTom Herbert return -EFAULT; 1330ab7ac4ebSTom Herbert return 0; 1331ab7ac4ebSTom Herbert } 1332ab7ac4ebSTom Herbert 1333ab7ac4ebSTom Herbert static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) 1334ab7ac4ebSTom Herbert { 1335ab7ac4ebSTom Herbert struct kcm_sock *tkcm; 1336ab7ac4ebSTom Herbert struct list_head *head; 1337ab7ac4ebSTom Herbert int index = 0; 1338ab7ac4ebSTom Herbert 1339ab7ac4ebSTom Herbert /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so 1340ab7ac4ebSTom Herbert * we set sk_state, otherwise epoll_wait always returns right away with 1341ab7ac4ebSTom Herbert * POLLHUP 1342ab7ac4ebSTom Herbert */ 1343ab7ac4ebSTom Herbert kcm->sk.sk_state = TCP_ESTABLISHED; 1344ab7ac4ebSTom Herbert 1345ab7ac4ebSTom Herbert /* Add to mux's kcm sockets list */ 1346ab7ac4ebSTom Herbert kcm->mux = mux; 1347ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1348ab7ac4ebSTom Herbert 1349ab7ac4ebSTom Herbert head = &mux->kcm_socks; 1350ab7ac4ebSTom Herbert list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { 1351ab7ac4ebSTom Herbert if (tkcm->index != index) 1352ab7ac4ebSTom Herbert break; 1353ab7ac4ebSTom Herbert head = &tkcm->kcm_sock_list; 1354ab7ac4ebSTom Herbert index++; 1355ab7ac4ebSTom Herbert } 1356ab7ac4ebSTom Herbert 1357ab7ac4ebSTom Herbert list_add(&kcm->kcm_sock_list, head); 1358ab7ac4ebSTom Herbert kcm->index = index; 1359ab7ac4ebSTom Herbert 1360ab7ac4ebSTom Herbert mux->kcm_socks_cnt++; 1361ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1362ab7ac4ebSTom Herbert 1363ab7ac4ebSTom Herbert INIT_WORK(&kcm->tx_work, kcm_tx_work); 1364ab7ac4ebSTom Herbert 1365ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1366ab7ac4ebSTom Herbert kcm_rcv_ready(kcm); 1367ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1368ab7ac4ebSTom Herbert } 1369ab7ac4ebSTom Herbert 1370ab7ac4ebSTom Herbert static int kcm_attach(struct socket *sock, struct socket *csock, 1371ab7ac4ebSTom Herbert struct bpf_prog *prog) 1372ab7ac4ebSTom Herbert { 1373ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1374ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1375ab7ac4ebSTom Herbert struct sock *csk; 1376ab7ac4ebSTom Herbert struct kcm_psock *psock = NULL, *tpsock; 1377ab7ac4ebSTom Herbert struct list_head *head; 1378ab7ac4ebSTom Herbert int index = 0; 13799b73896aSTom Herbert struct strp_callbacks cb; 138096a59083STom Herbert int err; 1381ab7ac4ebSTom Herbert 1382ab7ac4ebSTom Herbert csk = csock->sk; 1383ab7ac4ebSTom Herbert if (!csk) 1384ab7ac4ebSTom Herbert return -EINVAL; 1385ab7ac4ebSTom Herbert 1386*351050ecSEric Dumazet /* We must prevent loops or risk deadlock ! */ 1387*351050ecSEric Dumazet if (csk->sk_family == PF_KCM) 1388*351050ecSEric Dumazet return -EOPNOTSUPP; 1389*351050ecSEric Dumazet 1390ab7ac4ebSTom Herbert psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); 1391ab7ac4ebSTom Herbert if (!psock) 1392ab7ac4ebSTom Herbert return -ENOMEM; 1393ab7ac4ebSTom Herbert 1394ab7ac4ebSTom Herbert psock->mux = mux; 1395ab7ac4ebSTom Herbert psock->sk = csk; 1396ab7ac4ebSTom Herbert psock->bpf_prog = prog; 139729152a34STom Herbert 13989b73896aSTom Herbert cb.rcv_msg = kcm_rcv_strparser; 13999b73896aSTom Herbert cb.abort_parser = NULL; 14009b73896aSTom Herbert cb.parse_msg = kcm_parse_func_strparser; 14019b73896aSTom Herbert cb.read_sock_done = kcm_read_sock_done; 140229152a34STom Herbert 140396a59083STom Herbert err = strp_init(&psock->strp, csk, &cb); 140496a59083STom Herbert if (err) { 140596a59083STom Herbert kmem_cache_free(kcm_psockp, psock); 140696a59083STom Herbert return err; 140796a59083STom Herbert } 1408ab7ac4ebSTom Herbert 1409ab7ac4ebSTom Herbert sock_hold(csk); 1410ab7ac4ebSTom Herbert 1411ab7ac4ebSTom Herbert write_lock_bh(&csk->sk_callback_lock); 1412ab7ac4ebSTom Herbert psock->save_data_ready = csk->sk_data_ready; 1413ab7ac4ebSTom Herbert psock->save_write_space = csk->sk_write_space; 1414ab7ac4ebSTom Herbert psock->save_state_change = csk->sk_state_change; 1415ab7ac4ebSTom Herbert csk->sk_user_data = psock; 141696a59083STom Herbert csk->sk_data_ready = psock_data_ready; 141796a59083STom Herbert csk->sk_write_space = psock_write_space; 141896a59083STom Herbert csk->sk_state_change = psock_state_change; 1419ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 1420ab7ac4ebSTom Herbert 1421ab7ac4ebSTom Herbert /* Finished initialization, now add the psock to the MUX. */ 1422ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1423ab7ac4ebSTom Herbert head = &mux->psocks; 1424ab7ac4ebSTom Herbert list_for_each_entry(tpsock, &mux->psocks, psock_list) { 1425ab7ac4ebSTom Herbert if (tpsock->index != index) 1426ab7ac4ebSTom Herbert break; 1427ab7ac4ebSTom Herbert head = &tpsock->psock_list; 1428ab7ac4ebSTom Herbert index++; 1429ab7ac4ebSTom Herbert } 1430ab7ac4ebSTom Herbert 1431ab7ac4ebSTom Herbert list_add(&psock->psock_list, head); 1432ab7ac4ebSTom Herbert psock->index = index; 1433ab7ac4ebSTom Herbert 1434cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_attach); 1435ab7ac4ebSTom Herbert mux->psocks_cnt++; 1436ab7ac4ebSTom Herbert psock_now_avail(psock); 1437ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1438ab7ac4ebSTom Herbert 1439ab7ac4ebSTom Herbert /* Schedule RX work in case there are already bytes queued */ 14409b73896aSTom Herbert strp_check_rcv(&psock->strp); 1441ab7ac4ebSTom Herbert 1442ab7ac4ebSTom Herbert return 0; 1443ab7ac4ebSTom Herbert } 1444ab7ac4ebSTom Herbert 1445ab7ac4ebSTom Herbert static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) 1446ab7ac4ebSTom Herbert { 1447ab7ac4ebSTom Herbert struct socket *csock; 1448ab7ac4ebSTom Herbert struct bpf_prog *prog; 1449ab7ac4ebSTom Herbert int err; 1450ab7ac4ebSTom Herbert 1451ab7ac4ebSTom Herbert csock = sockfd_lookup(info->fd, &err); 1452ab7ac4ebSTom Herbert if (!csock) 1453ab7ac4ebSTom Herbert return -ENOENT; 1454ab7ac4ebSTom Herbert 1455113214beSDaniel Borkmann prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER); 1456ab7ac4ebSTom Herbert if (IS_ERR(prog)) { 1457ab7ac4ebSTom Herbert err = PTR_ERR(prog); 1458ab7ac4ebSTom Herbert goto out; 1459ab7ac4ebSTom Herbert } 1460ab7ac4ebSTom Herbert 1461ab7ac4ebSTom Herbert err = kcm_attach(sock, csock, prog); 1462ab7ac4ebSTom Herbert if (err) { 1463ab7ac4ebSTom Herbert bpf_prog_put(prog); 1464ab7ac4ebSTom Herbert goto out; 1465ab7ac4ebSTom Herbert } 1466ab7ac4ebSTom Herbert 1467ab7ac4ebSTom Herbert /* Keep reference on file also */ 1468ab7ac4ebSTom Herbert 1469ab7ac4ebSTom Herbert return 0; 1470ab7ac4ebSTom Herbert out: 1471ab7ac4ebSTom Herbert fput(csock->file); 1472ab7ac4ebSTom Herbert return err; 1473ab7ac4ebSTom Herbert } 1474ab7ac4ebSTom Herbert 1475ab7ac4ebSTom Herbert static void kcm_unattach(struct kcm_psock *psock) 1476ab7ac4ebSTom Herbert { 1477ab7ac4ebSTom Herbert struct sock *csk = psock->sk; 1478ab7ac4ebSTom Herbert struct kcm_mux *mux = psock->mux; 1479ab7ac4ebSTom Herbert 14801616b38fSTom Herbert lock_sock(csk); 14811616b38fSTom Herbert 1482ab7ac4ebSTom Herbert /* Stop getting callbacks from TCP socket. After this there should 1483ab7ac4ebSTom Herbert * be no way to reserve a kcm for this psock. 1484ab7ac4ebSTom Herbert */ 1485ab7ac4ebSTom Herbert write_lock_bh(&csk->sk_callback_lock); 1486ab7ac4ebSTom Herbert csk->sk_user_data = NULL; 1487ab7ac4ebSTom Herbert csk->sk_data_ready = psock->save_data_ready; 1488ab7ac4ebSTom Herbert csk->sk_write_space = psock->save_write_space; 1489ab7ac4ebSTom Herbert csk->sk_state_change = psock->save_state_change; 14909b73896aSTom Herbert strp_stop(&psock->strp); 1491ab7ac4ebSTom Herbert 1492ab7ac4ebSTom Herbert if (WARN_ON(psock->rx_kcm)) { 1493ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 1494ab7ac4ebSTom Herbert return; 1495ab7ac4ebSTom Herbert } 1496ab7ac4ebSTom Herbert 1497ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1498ab7ac4ebSTom Herbert 1499ab7ac4ebSTom Herbert /* Stop receiver activities. After this point psock should not be 1500ab7ac4ebSTom Herbert * able to get onto ready list either through callbacks or work. 1501ab7ac4ebSTom Herbert */ 1502ab7ac4ebSTom Herbert if (psock->ready_rx_msg) { 1503ab7ac4ebSTom Herbert list_del(&psock->psock_ready_list); 1504ab7ac4ebSTom Herbert kfree_skb(psock->ready_rx_msg); 1505ab7ac4ebSTom Herbert psock->ready_rx_msg = NULL; 1506cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.rx_ready_drops); 1507ab7ac4ebSTom Herbert } 1508ab7ac4ebSTom Herbert 1509ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1510ab7ac4ebSTom Herbert 1511ab7ac4ebSTom Herbert write_unlock_bh(&csk->sk_callback_lock); 1512ab7ac4ebSTom Herbert 15131616b38fSTom Herbert /* Call strp_done without sock lock */ 15141616b38fSTom Herbert release_sock(csk); 15159b73896aSTom Herbert strp_done(&psock->strp); 15161616b38fSTom Herbert lock_sock(csk); 1517ab7ac4ebSTom Herbert 1518ab7ac4ebSTom Herbert bpf_prog_put(psock->bpf_prog); 1519ab7ac4ebSTom Herbert 1520ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1521ab7ac4ebSTom Herbert 1522cd6e111bSTom Herbert aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); 15239b73896aSTom Herbert save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); 1524cd6e111bSTom Herbert 1525cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_unattach); 1526cd6e111bSTom Herbert 1527ab7ac4ebSTom Herbert if (psock->tx_kcm) { 1528ab7ac4ebSTom Herbert /* psock was reserved. Just mark it finished and we will clean 1529ab7ac4ebSTom Herbert * up in the kcm paths, we need kcm lock which can not be 1530ab7ac4ebSTom Herbert * acquired here. 1531ab7ac4ebSTom Herbert */ 1532cd6e111bSTom Herbert KCM_STATS_INCR(mux->stats.psock_unattach_rsvd); 1533ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1534ab7ac4ebSTom Herbert 1535ab7ac4ebSTom Herbert /* We are unattaching a socket that is reserved. Abort the 1536ab7ac4ebSTom Herbert * socket since we may be out of sync in sending on it. We need 1537ab7ac4ebSTom Herbert * to do this without the mux lock. 1538ab7ac4ebSTom Herbert */ 1539ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, EPIPE, false); 1540ab7ac4ebSTom Herbert 1541ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1542ab7ac4ebSTom Herbert if (!psock->tx_kcm) { 1543ab7ac4ebSTom Herbert /* psock now unreserved in window mux was unlocked */ 1544ab7ac4ebSTom Herbert goto no_reserved; 1545ab7ac4ebSTom Herbert } 1546ab7ac4ebSTom Herbert psock->done = 1; 1547ab7ac4ebSTom Herbert 1548ab7ac4ebSTom Herbert /* Commit done before queuing work to process it */ 1549ab7ac4ebSTom Herbert smp_mb(); 1550ab7ac4ebSTom Herbert 1551ab7ac4ebSTom Herbert /* Queue tx work to make sure psock->done is handled */ 1552ab7ac4ebSTom Herbert queue_work(kcm_wq, &psock->tx_kcm->tx_work); 1553ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1554ab7ac4ebSTom Herbert } else { 1555ab7ac4ebSTom Herbert no_reserved: 1556ab7ac4ebSTom Herbert if (!psock->tx_stopped) 1557ab7ac4ebSTom Herbert list_del(&psock->psock_avail_list); 1558ab7ac4ebSTom Herbert list_del(&psock->psock_list); 1559ab7ac4ebSTom Herbert mux->psocks_cnt--; 1560ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1561ab7ac4ebSTom Herbert 1562ab7ac4ebSTom Herbert sock_put(csk); 1563ab7ac4ebSTom Herbert fput(csk->sk_socket->file); 1564ab7ac4ebSTom Herbert kmem_cache_free(kcm_psockp, psock); 1565ab7ac4ebSTom Herbert } 15661616b38fSTom Herbert 15671616b38fSTom Herbert release_sock(csk); 1568ab7ac4ebSTom Herbert } 1569ab7ac4ebSTom Herbert 1570ab7ac4ebSTom Herbert static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) 1571ab7ac4ebSTom Herbert { 1572ab7ac4ebSTom Herbert struct kcm_sock *kcm = kcm_sk(sock->sk); 1573ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1574ab7ac4ebSTom Herbert struct kcm_psock *psock; 1575ab7ac4ebSTom Herbert struct socket *csock; 1576ab7ac4ebSTom Herbert struct sock *csk; 1577ab7ac4ebSTom Herbert int err; 1578ab7ac4ebSTom Herbert 1579ab7ac4ebSTom Herbert csock = sockfd_lookup(info->fd, &err); 1580ab7ac4ebSTom Herbert if (!csock) 1581ab7ac4ebSTom Herbert return -ENOENT; 1582ab7ac4ebSTom Herbert 1583ab7ac4ebSTom Herbert csk = csock->sk; 1584ab7ac4ebSTom Herbert if (!csk) { 1585ab7ac4ebSTom Herbert err = -EINVAL; 1586ab7ac4ebSTom Herbert goto out; 1587ab7ac4ebSTom Herbert } 1588ab7ac4ebSTom Herbert 1589ab7ac4ebSTom Herbert err = -ENOENT; 1590ab7ac4ebSTom Herbert 1591ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1592ab7ac4ebSTom Herbert 1593ab7ac4ebSTom Herbert list_for_each_entry(psock, &mux->psocks, psock_list) { 1594ab7ac4ebSTom Herbert if (psock->sk != csk) 1595ab7ac4ebSTom Herbert continue; 1596ab7ac4ebSTom Herbert 1597ab7ac4ebSTom Herbert /* Found the matching psock */ 1598ab7ac4ebSTom Herbert 1599ab7ac4ebSTom Herbert if (psock->unattaching || WARN_ON(psock->done)) { 1600ab7ac4ebSTom Herbert err = -EALREADY; 1601ab7ac4ebSTom Herbert break; 1602ab7ac4ebSTom Herbert } 1603ab7ac4ebSTom Herbert 1604ab7ac4ebSTom Herbert psock->unattaching = 1; 1605ab7ac4ebSTom Herbert 1606ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1607ab7ac4ebSTom Herbert 16089b73896aSTom Herbert /* Lower socket lock should already be held */ 1609ab7ac4ebSTom Herbert kcm_unattach(psock); 1610ab7ac4ebSTom Herbert 1611ab7ac4ebSTom Herbert err = 0; 1612ab7ac4ebSTom Herbert goto out; 1613ab7ac4ebSTom Herbert } 1614ab7ac4ebSTom Herbert 1615ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1616ab7ac4ebSTom Herbert 1617ab7ac4ebSTom Herbert out: 1618ab7ac4ebSTom Herbert fput(csock->file); 1619ab7ac4ebSTom Herbert return err; 1620ab7ac4ebSTom Herbert } 1621ab7ac4ebSTom Herbert 1622ab7ac4ebSTom Herbert static struct proto kcm_proto = { 1623ab7ac4ebSTom Herbert .name = "KCM", 1624ab7ac4ebSTom Herbert .owner = THIS_MODULE, 1625ab7ac4ebSTom Herbert .obj_size = sizeof(struct kcm_sock), 1626ab7ac4ebSTom Herbert }; 1627ab7ac4ebSTom Herbert 1628ab7ac4ebSTom Herbert /* Clone a kcm socket. */ 1629ab7ac4ebSTom Herbert static int kcm_clone(struct socket *osock, struct kcm_clone *info, 1630ab7ac4ebSTom Herbert struct socket **newsockp) 1631ab7ac4ebSTom Herbert { 1632ab7ac4ebSTom Herbert struct socket *newsock; 1633ab7ac4ebSTom Herbert struct sock *newsk; 1634ab7ac4ebSTom Herbert struct file *newfile; 1635ab7ac4ebSTom Herbert int err, newfd; 1636ab7ac4ebSTom Herbert 1637ab7ac4ebSTom Herbert err = -ENFILE; 1638ab7ac4ebSTom Herbert newsock = sock_alloc(); 1639ab7ac4ebSTom Herbert if (!newsock) 1640ab7ac4ebSTom Herbert goto out; 1641ab7ac4ebSTom Herbert 1642ab7ac4ebSTom Herbert newsock->type = osock->type; 1643ab7ac4ebSTom Herbert newsock->ops = osock->ops; 1644ab7ac4ebSTom Herbert 1645ab7ac4ebSTom Herbert __module_get(newsock->ops->owner); 1646ab7ac4ebSTom Herbert 1647ab7ac4ebSTom Herbert newfd = get_unused_fd_flags(0); 1648ab7ac4ebSTom Herbert if (unlikely(newfd < 0)) { 1649ab7ac4ebSTom Herbert err = newfd; 1650ab7ac4ebSTom Herbert goto out_fd_fail; 1651ab7ac4ebSTom Herbert } 1652ab7ac4ebSTom Herbert 1653ab7ac4ebSTom Herbert newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); 1654ab7ac4ebSTom Herbert if (unlikely(IS_ERR(newfile))) { 1655ab7ac4ebSTom Herbert err = PTR_ERR(newfile); 1656ab7ac4ebSTom Herbert goto out_sock_alloc_fail; 1657ab7ac4ebSTom Herbert } 1658ab7ac4ebSTom Herbert 1659ab7ac4ebSTom Herbert newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, 1660ab7ac4ebSTom Herbert &kcm_proto, true); 1661ab7ac4ebSTom Herbert if (!newsk) { 1662ab7ac4ebSTom Herbert err = -ENOMEM; 1663ab7ac4ebSTom Herbert goto out_sk_alloc_fail; 1664ab7ac4ebSTom Herbert } 1665ab7ac4ebSTom Herbert 1666ab7ac4ebSTom Herbert sock_init_data(newsock, newsk); 1667ab7ac4ebSTom Herbert init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); 1668ab7ac4ebSTom Herbert 1669ab7ac4ebSTom Herbert fd_install(newfd, newfile); 1670ab7ac4ebSTom Herbert *newsockp = newsock; 1671ab7ac4ebSTom Herbert info->fd = newfd; 1672ab7ac4ebSTom Herbert 1673ab7ac4ebSTom Herbert return 0; 1674ab7ac4ebSTom Herbert 1675ab7ac4ebSTom Herbert out_sk_alloc_fail: 1676ab7ac4ebSTom Herbert fput(newfile); 1677ab7ac4ebSTom Herbert out_sock_alloc_fail: 1678ab7ac4ebSTom Herbert put_unused_fd(newfd); 1679ab7ac4ebSTom Herbert out_fd_fail: 1680ab7ac4ebSTom Herbert sock_release(newsock); 1681ab7ac4ebSTom Herbert out: 1682ab7ac4ebSTom Herbert return err; 1683ab7ac4ebSTom Herbert } 1684ab7ac4ebSTom Herbert 1685ab7ac4ebSTom Herbert static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1686ab7ac4ebSTom Herbert { 1687ab7ac4ebSTom Herbert int err; 1688ab7ac4ebSTom Herbert 1689ab7ac4ebSTom Herbert switch (cmd) { 1690ab7ac4ebSTom Herbert case SIOCKCMATTACH: { 1691ab7ac4ebSTom Herbert struct kcm_attach info; 1692ab7ac4ebSTom Herbert 1693ab7ac4ebSTom Herbert if (copy_from_user(&info, (void __user *)arg, sizeof(info))) 1694a80db69eSWANG Cong return -EFAULT; 1695ab7ac4ebSTom Herbert 1696ab7ac4ebSTom Herbert err = kcm_attach_ioctl(sock, &info); 1697ab7ac4ebSTom Herbert 1698ab7ac4ebSTom Herbert break; 1699ab7ac4ebSTom Herbert } 1700ab7ac4ebSTom Herbert case SIOCKCMUNATTACH: { 1701ab7ac4ebSTom Herbert struct kcm_unattach info; 1702ab7ac4ebSTom Herbert 1703ab7ac4ebSTom Herbert if (copy_from_user(&info, (void __user *)arg, sizeof(info))) 1704a80db69eSWANG Cong return -EFAULT; 1705ab7ac4ebSTom Herbert 1706ab7ac4ebSTom Herbert err = kcm_unattach_ioctl(sock, &info); 1707ab7ac4ebSTom Herbert 1708ab7ac4ebSTom Herbert break; 1709ab7ac4ebSTom Herbert } 1710ab7ac4ebSTom Herbert case SIOCKCMCLONE: { 1711ab7ac4ebSTom Herbert struct kcm_clone info; 1712ab7ac4ebSTom Herbert struct socket *newsock = NULL; 1713ab7ac4ebSTom Herbert 1714ab7ac4ebSTom Herbert err = kcm_clone(sock, &info, &newsock); 1715ab7ac4ebSTom Herbert if (!err) { 1716ab7ac4ebSTom Herbert if (copy_to_user((void __user *)arg, &info, 1717ab7ac4ebSTom Herbert sizeof(info))) { 1718ab7ac4ebSTom Herbert err = -EFAULT; 1719c0338affSWANG Cong sys_close(info.fd); 1720ab7ac4ebSTom Herbert } 1721ab7ac4ebSTom Herbert } 1722ab7ac4ebSTom Herbert 1723ab7ac4ebSTom Herbert break; 1724ab7ac4ebSTom Herbert } 1725ab7ac4ebSTom Herbert default: 1726ab7ac4ebSTom Herbert err = -ENOIOCTLCMD; 1727ab7ac4ebSTom Herbert break; 1728ab7ac4ebSTom Herbert } 1729ab7ac4ebSTom Herbert 1730ab7ac4ebSTom Herbert return err; 1731ab7ac4ebSTom Herbert } 1732ab7ac4ebSTom Herbert 1733ab7ac4ebSTom Herbert static void free_mux(struct rcu_head *rcu) 1734ab7ac4ebSTom Herbert { 1735ab7ac4ebSTom Herbert struct kcm_mux *mux = container_of(rcu, 1736ab7ac4ebSTom Herbert struct kcm_mux, rcu); 1737ab7ac4ebSTom Herbert 1738ab7ac4ebSTom Herbert kmem_cache_free(kcm_muxp, mux); 1739ab7ac4ebSTom Herbert } 1740ab7ac4ebSTom Herbert 1741ab7ac4ebSTom Herbert static void release_mux(struct kcm_mux *mux) 1742ab7ac4ebSTom Herbert { 1743ab7ac4ebSTom Herbert struct kcm_net *knet = mux->knet; 1744ab7ac4ebSTom Herbert struct kcm_psock *psock, *tmp_psock; 1745ab7ac4ebSTom Herbert 1746ab7ac4ebSTom Herbert /* Release psocks */ 1747ab7ac4ebSTom Herbert list_for_each_entry_safe(psock, tmp_psock, 1748ab7ac4ebSTom Herbert &mux->psocks, psock_list) { 17491616b38fSTom Herbert if (!WARN_ON(psock->unattaching)) 1750ab7ac4ebSTom Herbert kcm_unattach(psock); 1751ab7ac4ebSTom Herbert } 1752ab7ac4ebSTom Herbert 1753ab7ac4ebSTom Herbert if (WARN_ON(mux->psocks_cnt)) 1754ab7ac4ebSTom Herbert return; 1755ab7ac4ebSTom Herbert 1756ab7ac4ebSTom Herbert __skb_queue_purge(&mux->rx_hold_queue); 1757ab7ac4ebSTom Herbert 1758ab7ac4ebSTom Herbert mutex_lock(&knet->mutex); 1759cd6e111bSTom Herbert aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); 1760cd6e111bSTom Herbert aggregate_psock_stats(&mux->aggregate_psock_stats, 1761cd6e111bSTom Herbert &knet->aggregate_psock_stats); 17629b73896aSTom Herbert aggregate_strp_stats(&mux->aggregate_strp_stats, 17639b73896aSTom Herbert &knet->aggregate_strp_stats); 1764ab7ac4ebSTom Herbert list_del_rcu(&mux->kcm_mux_list); 1765ab7ac4ebSTom Herbert knet->count--; 1766ab7ac4ebSTom Herbert mutex_unlock(&knet->mutex); 1767ab7ac4ebSTom Herbert 1768ab7ac4ebSTom Herbert call_rcu(&mux->rcu, free_mux); 1769ab7ac4ebSTom Herbert } 1770ab7ac4ebSTom Herbert 1771ab7ac4ebSTom Herbert static void kcm_done(struct kcm_sock *kcm) 1772ab7ac4ebSTom Herbert { 1773ab7ac4ebSTom Herbert struct kcm_mux *mux = kcm->mux; 1774ab7ac4ebSTom Herbert struct sock *sk = &kcm->sk; 1775ab7ac4ebSTom Herbert int socks_cnt; 1776ab7ac4ebSTom Herbert 1777ab7ac4ebSTom Herbert spin_lock_bh(&mux->rx_lock); 1778ab7ac4ebSTom Herbert if (kcm->rx_psock) { 1779ab7ac4ebSTom Herbert /* Cleanup in unreserve_rx_kcm */ 1780ab7ac4ebSTom Herbert WARN_ON(kcm->done); 1781ab7ac4ebSTom Herbert kcm->rx_disabled = 1; 1782ab7ac4ebSTom Herbert kcm->done = 1; 1783ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1784ab7ac4ebSTom Herbert return; 1785ab7ac4ebSTom Herbert } 1786ab7ac4ebSTom Herbert 1787ab7ac4ebSTom Herbert if (kcm->rx_wait) { 1788ab7ac4ebSTom Herbert list_del(&kcm->wait_rx_list); 1789ab7ac4ebSTom Herbert kcm->rx_wait = false; 1790ab7ac4ebSTom Herbert } 1791ab7ac4ebSTom Herbert /* Move any pending receive messages to other kcm sockets */ 1792ab7ac4ebSTom Herbert requeue_rx_msgs(mux, &sk->sk_receive_queue); 1793ab7ac4ebSTom Herbert 1794ab7ac4ebSTom Herbert spin_unlock_bh(&mux->rx_lock); 1795ab7ac4ebSTom Herbert 1796ab7ac4ebSTom Herbert if (WARN_ON(sk_rmem_alloc_get(sk))) 1797ab7ac4ebSTom Herbert return; 1798ab7ac4ebSTom Herbert 1799ab7ac4ebSTom Herbert /* Detach from MUX */ 1800ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1801ab7ac4ebSTom Herbert 1802ab7ac4ebSTom Herbert list_del(&kcm->kcm_sock_list); 1803ab7ac4ebSTom Herbert mux->kcm_socks_cnt--; 1804ab7ac4ebSTom Herbert socks_cnt = mux->kcm_socks_cnt; 1805ab7ac4ebSTom Herbert 1806ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1807ab7ac4ebSTom Herbert 1808ab7ac4ebSTom Herbert if (!socks_cnt) { 1809ab7ac4ebSTom Herbert /* We are done with the mux now. */ 1810ab7ac4ebSTom Herbert release_mux(mux); 1811ab7ac4ebSTom Herbert } 1812ab7ac4ebSTom Herbert 1813ab7ac4ebSTom Herbert WARN_ON(kcm->rx_wait); 1814ab7ac4ebSTom Herbert 1815ab7ac4ebSTom Herbert sock_put(&kcm->sk); 1816ab7ac4ebSTom Herbert } 1817ab7ac4ebSTom Herbert 1818ab7ac4ebSTom Herbert /* Called by kcm_release to close a KCM socket. 1819ab7ac4ebSTom Herbert * If this is the last KCM socket on the MUX, destroy the MUX. 1820ab7ac4ebSTom Herbert */ 1821ab7ac4ebSTom Herbert static int kcm_release(struct socket *sock) 1822ab7ac4ebSTom Herbert { 1823ab7ac4ebSTom Herbert struct sock *sk = sock->sk; 1824ab7ac4ebSTom Herbert struct kcm_sock *kcm; 1825ab7ac4ebSTom Herbert struct kcm_mux *mux; 1826ab7ac4ebSTom Herbert struct kcm_psock *psock; 1827ab7ac4ebSTom Herbert 1828ab7ac4ebSTom Herbert if (!sk) 1829ab7ac4ebSTom Herbert return 0; 1830ab7ac4ebSTom Herbert 1831ab7ac4ebSTom Herbert kcm = kcm_sk(sk); 1832ab7ac4ebSTom Herbert mux = kcm->mux; 1833ab7ac4ebSTom Herbert 1834ab7ac4ebSTom Herbert sock_orphan(sk); 1835ab7ac4ebSTom Herbert kfree_skb(kcm->seq_skb); 1836ab7ac4ebSTom Herbert 1837ab7ac4ebSTom Herbert lock_sock(sk); 1838ab7ac4ebSTom Herbert /* Purge queue under lock to avoid race condition with tx_work trying 1839ab7ac4ebSTom Herbert * to act when queue is nonempty. If tx_work runs after this point 1840ab7ac4ebSTom Herbert * it will just return. 1841ab7ac4ebSTom Herbert */ 1842ab7ac4ebSTom Herbert __skb_queue_purge(&sk->sk_write_queue); 18439b73896aSTom Herbert 18449b73896aSTom Herbert /* Set tx_stopped. This is checked when psock is bound to a kcm and we 18459b73896aSTom Herbert * get a writespace callback. This prevents further work being queued 18469b73896aSTom Herbert * from the callback (unbinding the psock occurs after canceling work. 18479b73896aSTom Herbert */ 18489b73896aSTom Herbert kcm->tx_stopped = 1; 18499b73896aSTom Herbert 1850ab7ac4ebSTom Herbert release_sock(sk); 1851ab7ac4ebSTom Herbert 1852ab7ac4ebSTom Herbert spin_lock_bh(&mux->lock); 1853ab7ac4ebSTom Herbert if (kcm->tx_wait) { 1854ab7ac4ebSTom Herbert /* Take of tx_wait list, after this point there should be no way 1855ab7ac4ebSTom Herbert * that a psock will be assigned to this kcm. 1856ab7ac4ebSTom Herbert */ 1857ab7ac4ebSTom Herbert list_del(&kcm->wait_psock_list); 1858ab7ac4ebSTom Herbert kcm->tx_wait = false; 1859ab7ac4ebSTom Herbert } 1860ab7ac4ebSTom Herbert spin_unlock_bh(&mux->lock); 1861ab7ac4ebSTom Herbert 1862ab7ac4ebSTom Herbert /* Cancel work. After this point there should be no outside references 1863ab7ac4ebSTom Herbert * to the kcm socket. 1864ab7ac4ebSTom Herbert */ 1865ab7ac4ebSTom Herbert cancel_work_sync(&kcm->tx_work); 1866ab7ac4ebSTom Herbert 1867ab7ac4ebSTom Herbert lock_sock(sk); 1868ab7ac4ebSTom Herbert psock = kcm->tx_psock; 1869ab7ac4ebSTom Herbert if (psock) { 1870ab7ac4ebSTom Herbert /* A psock was reserved, so we need to kill it since it 1871ab7ac4ebSTom Herbert * may already have some bytes queued from a message. We 1872ab7ac4ebSTom Herbert * need to do this after removing kcm from tx_wait list. 1873ab7ac4ebSTom Herbert */ 1874ab7ac4ebSTom Herbert kcm_abort_tx_psock(psock, EPIPE, false); 1875ab7ac4ebSTom Herbert unreserve_psock(kcm); 1876ab7ac4ebSTom Herbert } 1877ab7ac4ebSTom Herbert release_sock(sk); 1878ab7ac4ebSTom Herbert 1879ab7ac4ebSTom Herbert WARN_ON(kcm->tx_wait); 1880ab7ac4ebSTom Herbert WARN_ON(kcm->tx_psock); 1881ab7ac4ebSTom Herbert 1882ab7ac4ebSTom Herbert sock->sk = NULL; 1883ab7ac4ebSTom Herbert 1884ab7ac4ebSTom Herbert kcm_done(kcm); 1885ab7ac4ebSTom Herbert 1886ab7ac4ebSTom Herbert return 0; 1887ab7ac4ebSTom Herbert } 1888ab7ac4ebSTom Herbert 188991687355STom Herbert static const struct proto_ops kcm_dgram_ops = { 1890ab7ac4ebSTom Herbert .family = PF_KCM, 1891ab7ac4ebSTom Herbert .owner = THIS_MODULE, 1892ab7ac4ebSTom Herbert .release = kcm_release, 1893ab7ac4ebSTom Herbert .bind = sock_no_bind, 1894ab7ac4ebSTom Herbert .connect = sock_no_connect, 1895ab7ac4ebSTom Herbert .socketpair = sock_no_socketpair, 1896ab7ac4ebSTom Herbert .accept = sock_no_accept, 1897ab7ac4ebSTom Herbert .getname = sock_no_getname, 1898ab7ac4ebSTom Herbert .poll = datagram_poll, 1899ab7ac4ebSTom Herbert .ioctl = kcm_ioctl, 1900ab7ac4ebSTom Herbert .listen = sock_no_listen, 1901ab7ac4ebSTom Herbert .shutdown = sock_no_shutdown, 1902ab7ac4ebSTom Herbert .setsockopt = kcm_setsockopt, 1903ab7ac4ebSTom Herbert .getsockopt = kcm_getsockopt, 1904ab7ac4ebSTom Herbert .sendmsg = kcm_sendmsg, 1905ab7ac4ebSTom Herbert .recvmsg = kcm_recvmsg, 1906ab7ac4ebSTom Herbert .mmap = sock_no_mmap, 1907f29698fcSTom Herbert .sendpage = kcm_sendpage, 1908ab7ac4ebSTom Herbert }; 1909ab7ac4ebSTom Herbert 191091687355STom Herbert static const struct proto_ops kcm_seqpacket_ops = { 191191687355STom Herbert .family = PF_KCM, 191291687355STom Herbert .owner = THIS_MODULE, 191391687355STom Herbert .release = kcm_release, 191491687355STom Herbert .bind = sock_no_bind, 191591687355STom Herbert .connect = sock_no_connect, 191691687355STom Herbert .socketpair = sock_no_socketpair, 191791687355STom Herbert .accept = sock_no_accept, 191891687355STom Herbert .getname = sock_no_getname, 191991687355STom Herbert .poll = datagram_poll, 192091687355STom Herbert .ioctl = kcm_ioctl, 192191687355STom Herbert .listen = sock_no_listen, 192291687355STom Herbert .shutdown = sock_no_shutdown, 192391687355STom Herbert .setsockopt = kcm_setsockopt, 192491687355STom Herbert .getsockopt = kcm_getsockopt, 192591687355STom Herbert .sendmsg = kcm_sendmsg, 192691687355STom Herbert .recvmsg = kcm_recvmsg, 192791687355STom Herbert .mmap = sock_no_mmap, 1928f29698fcSTom Herbert .sendpage = kcm_sendpage, 192991687355STom Herbert .splice_read = kcm_splice_read, 193091687355STom Herbert }; 193191687355STom Herbert 1932ab7ac4ebSTom Herbert /* Create proto operation for kcm sockets */ 1933ab7ac4ebSTom Herbert static int kcm_create(struct net *net, struct socket *sock, 1934ab7ac4ebSTom Herbert int protocol, int kern) 1935ab7ac4ebSTom Herbert { 1936ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 1937ab7ac4ebSTom Herbert struct sock *sk; 1938ab7ac4ebSTom Herbert struct kcm_mux *mux; 1939ab7ac4ebSTom Herbert 1940ab7ac4ebSTom Herbert switch (sock->type) { 1941ab7ac4ebSTom Herbert case SOCK_DGRAM: 194291687355STom Herbert sock->ops = &kcm_dgram_ops; 194391687355STom Herbert break; 1944ab7ac4ebSTom Herbert case SOCK_SEQPACKET: 194591687355STom Herbert sock->ops = &kcm_seqpacket_ops; 1946ab7ac4ebSTom Herbert break; 1947ab7ac4ebSTom Herbert default: 1948ab7ac4ebSTom Herbert return -ESOCKTNOSUPPORT; 1949ab7ac4ebSTom Herbert } 1950ab7ac4ebSTom Herbert 1951ab7ac4ebSTom Herbert if (protocol != KCMPROTO_CONNECTED) 1952ab7ac4ebSTom Herbert return -EPROTONOSUPPORT; 1953ab7ac4ebSTom Herbert 1954ab7ac4ebSTom Herbert sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); 1955ab7ac4ebSTom Herbert if (!sk) 1956ab7ac4ebSTom Herbert return -ENOMEM; 1957ab7ac4ebSTom Herbert 1958ab7ac4ebSTom Herbert /* Allocate a kcm mux, shared between KCM sockets */ 1959ab7ac4ebSTom Herbert mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); 1960ab7ac4ebSTom Herbert if (!mux) { 1961ab7ac4ebSTom Herbert sk_free(sk); 1962ab7ac4ebSTom Herbert return -ENOMEM; 1963ab7ac4ebSTom Herbert } 1964ab7ac4ebSTom Herbert 1965ab7ac4ebSTom Herbert spin_lock_init(&mux->lock); 1966ab7ac4ebSTom Herbert spin_lock_init(&mux->rx_lock); 1967ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_socks); 1968ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_rx_waiters); 1969ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->kcm_tx_waiters); 1970ab7ac4ebSTom Herbert 1971ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks); 1972ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks_ready); 1973ab7ac4ebSTom Herbert INIT_LIST_HEAD(&mux->psocks_avail); 1974ab7ac4ebSTom Herbert 1975ab7ac4ebSTom Herbert mux->knet = knet; 1976ab7ac4ebSTom Herbert 1977ab7ac4ebSTom Herbert /* Add new MUX to list */ 1978ab7ac4ebSTom Herbert mutex_lock(&knet->mutex); 1979ab7ac4ebSTom Herbert list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); 1980ab7ac4ebSTom Herbert knet->count++; 1981ab7ac4ebSTom Herbert mutex_unlock(&knet->mutex); 1982ab7ac4ebSTom Herbert 1983ab7ac4ebSTom Herbert skb_queue_head_init(&mux->rx_hold_queue); 1984ab7ac4ebSTom Herbert 1985ab7ac4ebSTom Herbert /* Init KCM socket */ 1986ab7ac4ebSTom Herbert sock_init_data(sock, sk); 1987ab7ac4ebSTom Herbert init_kcm_sock(kcm_sk(sk), mux); 1988ab7ac4ebSTom Herbert 1989ab7ac4ebSTom Herbert return 0; 1990ab7ac4ebSTom Herbert } 1991ab7ac4ebSTom Herbert 1992173e7837Slinzhang static const struct net_proto_family kcm_family_ops = { 1993ab7ac4ebSTom Herbert .family = PF_KCM, 1994ab7ac4ebSTom Herbert .create = kcm_create, 1995ab7ac4ebSTom Herbert .owner = THIS_MODULE, 1996ab7ac4ebSTom Herbert }; 1997ab7ac4ebSTom Herbert 1998ab7ac4ebSTom Herbert static __net_init int kcm_init_net(struct net *net) 1999ab7ac4ebSTom Herbert { 2000ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 2001ab7ac4ebSTom Herbert 2002ab7ac4ebSTom Herbert INIT_LIST_HEAD_RCU(&knet->mux_list); 2003ab7ac4ebSTom Herbert mutex_init(&knet->mutex); 2004ab7ac4ebSTom Herbert 2005ab7ac4ebSTom Herbert return 0; 2006ab7ac4ebSTom Herbert } 2007ab7ac4ebSTom Herbert 2008ab7ac4ebSTom Herbert static __net_exit void kcm_exit_net(struct net *net) 2009ab7ac4ebSTom Herbert { 2010ab7ac4ebSTom Herbert struct kcm_net *knet = net_generic(net, kcm_net_id); 2011ab7ac4ebSTom Herbert 2012ab7ac4ebSTom Herbert /* All KCM sockets should be closed at this point, which should mean 2013ab7ac4ebSTom Herbert * that all multiplexors and psocks have been destroyed. 2014ab7ac4ebSTom Herbert */ 2015ab7ac4ebSTom Herbert WARN_ON(!list_empty(&knet->mux_list)); 2016ab7ac4ebSTom Herbert } 2017ab7ac4ebSTom Herbert 2018ab7ac4ebSTom Herbert static struct pernet_operations kcm_net_ops = { 2019ab7ac4ebSTom Herbert .init = kcm_init_net, 2020ab7ac4ebSTom Herbert .exit = kcm_exit_net, 2021ab7ac4ebSTom Herbert .id = &kcm_net_id, 2022ab7ac4ebSTom Herbert .size = sizeof(struct kcm_net), 2023ab7ac4ebSTom Herbert }; 2024ab7ac4ebSTom Herbert 2025ab7ac4ebSTom Herbert static int __init kcm_init(void) 2026ab7ac4ebSTom Herbert { 2027ab7ac4ebSTom Herbert int err = -ENOMEM; 2028ab7ac4ebSTom Herbert 2029ab7ac4ebSTom Herbert kcm_muxp = kmem_cache_create("kcm_mux_cache", 2030ab7ac4ebSTom Herbert sizeof(struct kcm_mux), 0, 2031ab7ac4ebSTom Herbert SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2032ab7ac4ebSTom Herbert if (!kcm_muxp) 2033ab7ac4ebSTom Herbert goto fail; 2034ab7ac4ebSTom Herbert 2035ab7ac4ebSTom Herbert kcm_psockp = kmem_cache_create("kcm_psock_cache", 2036ab7ac4ebSTom Herbert sizeof(struct kcm_psock), 0, 2037ab7ac4ebSTom Herbert SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2038ab7ac4ebSTom Herbert if (!kcm_psockp) 2039ab7ac4ebSTom Herbert goto fail; 2040ab7ac4ebSTom Herbert 2041ab7ac4ebSTom Herbert kcm_wq = create_singlethread_workqueue("kkcmd"); 2042ab7ac4ebSTom Herbert if (!kcm_wq) 2043ab7ac4ebSTom Herbert goto fail; 2044ab7ac4ebSTom Herbert 2045ab7ac4ebSTom Herbert err = proto_register(&kcm_proto, 1); 2046ab7ac4ebSTom Herbert if (err) 2047ab7ac4ebSTom Herbert goto fail; 2048ab7ac4ebSTom Herbert 2049ab7ac4ebSTom Herbert err = sock_register(&kcm_family_ops); 2050ab7ac4ebSTom Herbert if (err) 2051ab7ac4ebSTom Herbert goto sock_register_fail; 2052ab7ac4ebSTom Herbert 2053ab7ac4ebSTom Herbert err = register_pernet_device(&kcm_net_ops); 2054ab7ac4ebSTom Herbert if (err) 2055ab7ac4ebSTom Herbert goto net_ops_fail; 2056ab7ac4ebSTom Herbert 2057cd6e111bSTom Herbert err = kcm_proc_init(); 2058cd6e111bSTom Herbert if (err) 2059cd6e111bSTom Herbert goto proc_init_fail; 2060cd6e111bSTom Herbert 2061ab7ac4ebSTom Herbert return 0; 2062ab7ac4ebSTom Herbert 2063cd6e111bSTom Herbert proc_init_fail: 2064cd6e111bSTom Herbert unregister_pernet_device(&kcm_net_ops); 2065cd6e111bSTom Herbert 2066ab7ac4ebSTom Herbert net_ops_fail: 2067ab7ac4ebSTom Herbert sock_unregister(PF_KCM); 2068ab7ac4ebSTom Herbert 2069ab7ac4ebSTom Herbert sock_register_fail: 2070ab7ac4ebSTom Herbert proto_unregister(&kcm_proto); 2071ab7ac4ebSTom Herbert 2072ab7ac4ebSTom Herbert fail: 2073ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_muxp); 2074ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_psockp); 2075ab7ac4ebSTom Herbert 2076ab7ac4ebSTom Herbert if (kcm_wq) 2077ab7ac4ebSTom Herbert destroy_workqueue(kcm_wq); 2078ab7ac4ebSTom Herbert 2079ab7ac4ebSTom Herbert return err; 2080ab7ac4ebSTom Herbert } 2081ab7ac4ebSTom Herbert 2082ab7ac4ebSTom Herbert static void __exit kcm_exit(void) 2083ab7ac4ebSTom Herbert { 2084cd6e111bSTom Herbert kcm_proc_exit(); 2085ab7ac4ebSTom Herbert unregister_pernet_device(&kcm_net_ops); 2086ab7ac4ebSTom Herbert sock_unregister(PF_KCM); 2087ab7ac4ebSTom Herbert proto_unregister(&kcm_proto); 2088ab7ac4ebSTom Herbert destroy_workqueue(kcm_wq); 2089ab7ac4ebSTom Herbert 2090ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_muxp); 2091ab7ac4ebSTom Herbert kmem_cache_destroy(kcm_psockp); 2092ab7ac4ebSTom Herbert } 2093ab7ac4ebSTom Herbert 2094ab7ac4ebSTom Herbert module_init(kcm_init); 2095ab7ac4ebSTom Herbert module_exit(kcm_exit); 2096ab7ac4ebSTom Herbert 2097ab7ac4ebSTom Herbert MODULE_LICENSE("GPL"); 2098ab7ac4ebSTom Herbert MODULE_ALIAS_NETPROTO(PF_KCM); 2099ab7ac4ebSTom Herbert 2100