11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Definitions for the AF_INET socket handler. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: @(#)sock.h 1.0.4 05/13/93 91da177e4SLinus Torvalds * 1002c30a84SJesper Juhl * Authors: Ross Biro 111da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 121da177e4SLinus Torvalds * Corey Minyard <wf-rch!minyard@relay.EU.net> 131da177e4SLinus Torvalds * Florian La Roche <flla@stud.uni-sb.de> 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * Fixes: 161da177e4SLinus Torvalds * Alan Cox : Volatiles in skbuff pointers. See 171da177e4SLinus Torvalds * skbuff comments. May be overdone, 181da177e4SLinus Torvalds * better to prove they can be removed 191da177e4SLinus Torvalds * than the reverse. 201da177e4SLinus Torvalds * Alan Cox : Added a zapped field for tcp to note 211da177e4SLinus Torvalds * a socket is reset and must stay shut up 221da177e4SLinus Torvalds * Alan Cox : New fields for options 231da177e4SLinus Torvalds * Pauline Middelink : identd support 241da177e4SLinus Torvalds * Alan Cox : Eliminate low level recv/recvfrom 251da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 261da177e4SLinus Torvalds * Steve Whitehouse: Default routines for sock_ops 271da177e4SLinus Torvalds * Arnaldo C. Melo : removed net_pinfo, tp_pinfo and made 281da177e4SLinus Torvalds * protinfo be just a void pointer, as the 291da177e4SLinus Torvalds * protocol specific parts were moved to 301da177e4SLinus Torvalds * respective headers and ipv4/v6, etc now 311da177e4SLinus Torvalds * use private slabcaches for its socks 321da177e4SLinus Torvalds * Pedro Hortas : New flags field for socket options 331da177e4SLinus Torvalds * 341da177e4SLinus Torvalds * 351da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 361da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 371da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 381da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 391da177e4SLinus Torvalds */ 401da177e4SLinus Torvalds #ifndef _SOCK_H 411da177e4SLinus Torvalds #define _SOCK_H 421da177e4SLinus Torvalds 43172589ccSIlpo Järvinen #include <linux/kernel.h> 441da177e4SLinus Torvalds #include <linux/list.h> 4588ab1932SEric Dumazet #include <linux/list_nulls.h> 461da177e4SLinus Torvalds #include <linux/timer.h> 471da177e4SLinus Torvalds #include <linux/cache.h> 481da177e4SLinus Torvalds #include <linux/module.h> 49a5b5bb9aSIngo Molnar #include <linux/lockdep.h> 501da177e4SLinus Torvalds #include <linux/netdevice.h> 511da177e4SLinus Torvalds #include <linux/skbuff.h> /* struct sk_buff */ 52d7fe0f24SAl Viro #include <linux/mm.h> 531da177e4SLinus Torvalds #include <linux/security.h> 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #include <linux/filter.h> 5688ab1932SEric Dumazet #include <linux/rculist_nulls.h> 57a57de0b4SJiri Olsa #include <linux/poll.h> 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds #include <asm/atomic.h> 601da177e4SLinus Torvalds #include <net/dst.h> 611da177e4SLinus Torvalds #include <net/checksum.h> 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds /* 641da177e4SLinus Torvalds * This structure really needs to be cleaned up. 651da177e4SLinus Torvalds * Most of it is for TCP, and not used by any of 661da177e4SLinus Torvalds * the other protocols. 671da177e4SLinus Torvalds */ 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds /* Define this to get the SOCK_DBG debugging facility. */ 701da177e4SLinus Torvalds #define SOCK_DEBUGGING 711da177e4SLinus Torvalds #ifdef SOCK_DEBUGGING 721da177e4SLinus Torvalds #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \ 731da177e4SLinus Torvalds printk(KERN_DEBUG msg); } while (0) 741da177e4SLinus Torvalds #else 754cd9029dSStephen Hemminger /* Validate arguments and do nothing */ 764cd9029dSStephen Hemminger static void inline int __attribute__ ((format (printf, 2, 3))) 774cd9029dSStephen Hemminger SOCK_DEBUG(struct sock *sk, const char *msg, ...) 784cd9029dSStephen Hemminger { 794cd9029dSStephen Hemminger } 801da177e4SLinus Torvalds #endif 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds /* This is the per-socket lock. The spinlock provides a synchronization 831da177e4SLinus Torvalds * between user contexts and software interrupt processing, whereas the 841da177e4SLinus Torvalds * mini-semaphore synchronizes multiple users amongst themselves. 851da177e4SLinus Torvalds */ 861da177e4SLinus Torvalds typedef struct { 871da177e4SLinus Torvalds spinlock_t slock; 88d2e9117cSJohn Heffner int owned; 891da177e4SLinus Torvalds wait_queue_head_t wq; 90a5b5bb9aSIngo Molnar /* 91a5b5bb9aSIngo Molnar * We express the mutex-alike socket_lock semantics 92a5b5bb9aSIngo Molnar * to the lock validator by explicitly managing 93a5b5bb9aSIngo Molnar * the slock as a lock variant (in addition to 94a5b5bb9aSIngo Molnar * the slock itself): 95a5b5bb9aSIngo Molnar */ 96a5b5bb9aSIngo Molnar #ifdef CONFIG_DEBUG_LOCK_ALLOC 97a5b5bb9aSIngo Molnar struct lockdep_map dep_map; 98a5b5bb9aSIngo Molnar #endif 991da177e4SLinus Torvalds } socket_lock_t; 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds struct sock; 1028feaf0c0SArnaldo Carvalho de Melo struct proto; 1030eeb8ffcSDenis V. Lunev struct net; 1041da177e4SLinus Torvalds 1051da177e4SLinus Torvalds /** 1061da177e4SLinus Torvalds * struct sock_common - minimal network layer representation of sockets 1074dc6dc71SEric Dumazet * @skc_node: main hash linkage for various protocol lookup tables 108*512615b6SEric Dumazet * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol 1094dc6dc71SEric Dumazet * @skc_refcnt: reference count 110e022f0b4SKrishna Kumar * @skc_tx_queue_mapping: tx queue number for this connection 1114dc6dc71SEric Dumazet * @skc_hash: hash value used with various protocol lookup tables 112d4cada4aSEric Dumazet * @skc_u16hashes: two u16 hash values used by UDP lookup tables 1134dc3b16bSPavel Pisa * @skc_family: network address family 1144dc3b16bSPavel Pisa * @skc_state: Connection state 1154dc3b16bSPavel Pisa * @skc_reuse: %SO_REUSEADDR setting 1164dc3b16bSPavel Pisa * @skc_bound_dev_if: bound device index if != 0 1174dc3b16bSPavel Pisa * @skc_bind_node: bind hash linkage for various protocol lookup tables 118*512615b6SEric Dumazet * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol 1198feaf0c0SArnaldo Carvalho de Melo * @skc_prot: protocol handlers inside a network family 12007feaebfSEric W. Biederman * @skc_net: reference to the network namespace of this socket 1211da177e4SLinus Torvalds * 1221da177e4SLinus Torvalds * This is the minimal network layer representation of sockets, the header 1238feaf0c0SArnaldo Carvalho de Melo * for struct sock and struct inet_timewait_sock. 1241da177e4SLinus Torvalds */ 1251da177e4SLinus Torvalds struct sock_common { 1264dc6dc71SEric Dumazet /* 1274dc6dc71SEric Dumazet * first fields are not copied in sock_copy() 1284dc6dc71SEric Dumazet */ 12988ab1932SEric Dumazet union { 1301da177e4SLinus Torvalds struct hlist_node skc_node; 13188ab1932SEric Dumazet struct hlist_nulls_node skc_nulls_node; 13288ab1932SEric Dumazet }; 1331da177e4SLinus Torvalds atomic_t skc_refcnt; 134e022f0b4SKrishna Kumar int skc_tx_queue_mapping; 1354dc6dc71SEric Dumazet 136d4cada4aSEric Dumazet union { 13781c3d547SEric Dumazet unsigned int skc_hash; 138d4cada4aSEric Dumazet __u16 skc_u16hashes[2]; 139d4cada4aSEric Dumazet }; 1404dc6dc71SEric Dumazet unsigned short skc_family; 1414dc6dc71SEric Dumazet volatile unsigned char skc_state; 1424dc6dc71SEric Dumazet unsigned char skc_reuse; 1434dc6dc71SEric Dumazet int skc_bound_dev_if; 144*512615b6SEric Dumazet union { 1454dc6dc71SEric Dumazet struct hlist_node skc_bind_node; 146*512615b6SEric Dumazet struct hlist_nulls_node skc_portaddr_node; 147*512615b6SEric Dumazet }; 1488feaf0c0SArnaldo Carvalho de Melo struct proto *skc_prot; 1493b1e0a65SYOSHIFUJI Hideaki #ifdef CONFIG_NET_NS 15007feaebfSEric W. Biederman struct net *skc_net; 1513b1e0a65SYOSHIFUJI Hideaki #endif 1521da177e4SLinus Torvalds }; 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds /** 1551da177e4SLinus Torvalds * struct sock - network layer representation of sockets 1568feaf0c0SArnaldo Carvalho de Melo * @__sk_common: shared layout with inet_timewait_sock 1574dc3b16bSPavel Pisa * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 1584dc3b16bSPavel Pisa * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 1594dc3b16bSPavel Pisa * @sk_lock: synchronizer 1604dc3b16bSPavel Pisa * @sk_rcvbuf: size of receive buffer in bytes 1614dc3b16bSPavel Pisa * @sk_sleep: sock wait queue 1624dc3b16bSPavel Pisa * @sk_dst_cache: destination cache 1634dc3b16bSPavel Pisa * @sk_dst_lock: destination cache lock 1644dc3b16bSPavel Pisa * @sk_policy: flow policy 1654dc3b16bSPavel Pisa * @sk_rmem_alloc: receive queue bytes committed 1664dc3b16bSPavel Pisa * @sk_receive_queue: incoming packets 1674dc3b16bSPavel Pisa * @sk_wmem_alloc: transmit queue bytes committed 1684dc3b16bSPavel Pisa * @sk_write_queue: Packet sending queue 16997fc2f08SChris Leech * @sk_async_wait_queue: DMA copied packets 1704dc3b16bSPavel Pisa * @sk_omem_alloc: "o" is "option" or "other" 1714dc3b16bSPavel Pisa * @sk_wmem_queued: persistent queue size 1724dc3b16bSPavel Pisa * @sk_forward_alloc: space allocated forward 1734dc3b16bSPavel Pisa * @sk_allocation: allocation mode 1744dc3b16bSPavel Pisa * @sk_sndbuf: size of send buffer in bytes 17533c732c3SWang Chen * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 17620d49473SPatrick Ohly * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 1774dc3b16bSPavel Pisa * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 1784dc3b16bSPavel Pisa * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 179bcd76111SHerbert Xu * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 18082cc1a7aSPeter P Waskiewicz Jr * @sk_gso_max_size: Maximum GSO segment size to build 1814dc3b16bSPavel Pisa * @sk_lingertime: %SO_LINGER l_linger setting 1824dc3b16bSPavel Pisa * @sk_backlog: always used with the per-socket spinlock held 1834dc3b16bSPavel Pisa * @sk_callback_lock: used with the callbacks in the end of this struct 1844dc3b16bSPavel Pisa * @sk_error_queue: rarely used 18533c732c3SWang Chen * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, 18633c732c3SWang Chen * IPV6_ADDRFORM for instance) 1874dc3b16bSPavel Pisa * @sk_err: last error 18833c732c3SWang Chen * @sk_err_soft: errors that don't cause failure but are the cause of a 18933c732c3SWang Chen * persistent failure not just 'timed out' 190cb61cb9bSEric Dumazet * @sk_drops: raw/udp drops counter 1914dc3b16bSPavel Pisa * @sk_ack_backlog: current listen backlog 1924dc3b16bSPavel Pisa * @sk_max_ack_backlog: listen backlog set in listen() 1934dc3b16bSPavel Pisa * @sk_priority: %SO_PRIORITY setting 1944dc3b16bSPavel Pisa * @sk_type: socket type (%SOCK_STREAM, etc) 1954dc3b16bSPavel Pisa * @sk_protocol: which protocol this socket belongs in this network family 1964dc3b16bSPavel Pisa * @sk_peercred: %SO_PEERCRED setting 1974dc3b16bSPavel Pisa * @sk_rcvlowat: %SO_RCVLOWAT setting 1984dc3b16bSPavel Pisa * @sk_rcvtimeo: %SO_RCVTIMEO setting 1994dc3b16bSPavel Pisa * @sk_sndtimeo: %SO_SNDTIMEO setting 2004dc3b16bSPavel Pisa * @sk_filter: socket filtering instructions 2014dc3b16bSPavel Pisa * @sk_protinfo: private area, net family specific, when not using slab 2024dc3b16bSPavel Pisa * @sk_timer: sock cleanup timer 2034dc3b16bSPavel Pisa * @sk_stamp: time stamp of last packet received 2044dc3b16bSPavel Pisa * @sk_socket: Identd and reporting IO signals 2054dc3b16bSPavel Pisa * @sk_user_data: RPC layer private data 2064dc3b16bSPavel Pisa * @sk_sndmsg_page: cached page for sendmsg 2074dc3b16bSPavel Pisa * @sk_sndmsg_off: cached offset for sendmsg 2084dc3b16bSPavel Pisa * @sk_send_head: front of stuff to transmit 20967be2dd1SMartin Waitz * @sk_security: used by security modules 21031729363SRandy Dunlap * @sk_mark: generic packet mark 2114dc3b16bSPavel Pisa * @sk_write_pending: a write to stream socket waits to start 2124dc3b16bSPavel Pisa * @sk_state_change: callback to indicate change in the state of the sock 2134dc3b16bSPavel Pisa * @sk_data_ready: callback to indicate there is data to be processed 2144dc3b16bSPavel Pisa * @sk_write_space: callback to indicate there is bf sending space available 2154dc3b16bSPavel Pisa * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) 2164dc3b16bSPavel Pisa * @sk_backlog_rcv: callback to process the backlog 2174dc3b16bSPavel Pisa * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 2181da177e4SLinus Torvalds */ 2191da177e4SLinus Torvalds struct sock { 2201da177e4SLinus Torvalds /* 2218feaf0c0SArnaldo Carvalho de Melo * Now struct inet_timewait_sock also uses sock_common, so please just 2221da177e4SLinus Torvalds * don't add nothing before this first member (__sk_common) --acme 2231da177e4SLinus Torvalds */ 2241da177e4SLinus Torvalds struct sock_common __sk_common; 2254dc6dc71SEric Dumazet #define sk_node __sk_common.skc_node 2264dc6dc71SEric Dumazet #define sk_nulls_node __sk_common.skc_nulls_node 2274dc6dc71SEric Dumazet #define sk_refcnt __sk_common.skc_refcnt 228e022f0b4SKrishna Kumar #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping 2294dc6dc71SEric Dumazet 2304dc6dc71SEric Dumazet #define sk_copy_start __sk_common.skc_hash 2314dc6dc71SEric Dumazet #define sk_hash __sk_common.skc_hash 2321da177e4SLinus Torvalds #define sk_family __sk_common.skc_family 2331da177e4SLinus Torvalds #define sk_state __sk_common.skc_state 2341da177e4SLinus Torvalds #define sk_reuse __sk_common.skc_reuse 2351da177e4SLinus Torvalds #define sk_bound_dev_if __sk_common.skc_bound_dev_if 2361da177e4SLinus Torvalds #define sk_bind_node __sk_common.skc_bind_node 2378feaf0c0SArnaldo Carvalho de Melo #define sk_prot __sk_common.skc_prot 23807feaebfSEric W. Biederman #define sk_net __sk_common.skc_net 239a98b65a3SVegard Nossum kmemcheck_bitfield_begin(flags); 2405fdb9973SEric Dumazet unsigned int sk_shutdown : 2, 2411da177e4SLinus Torvalds sk_no_check : 2, 2425fdb9973SEric Dumazet sk_userlocks : 4, 2435fdb9973SEric Dumazet sk_protocol : 8, 2445fdb9973SEric Dumazet sk_type : 16; 245a98b65a3SVegard Nossum kmemcheck_bitfield_end(flags); 2461da177e4SLinus Torvalds int sk_rcvbuf; 2471da177e4SLinus Torvalds socket_lock_t sk_lock; 248fa438ccfSEric Dumazet /* 249fa438ccfSEric Dumazet * The backlog queue is special, it is always used with 250fa438ccfSEric Dumazet * the per-socket spinlock held and requires low latency 251fa438ccfSEric Dumazet * access. Therefore we special case it's implementation. 252fa438ccfSEric Dumazet */ 253fa438ccfSEric Dumazet struct { 254fa438ccfSEric Dumazet struct sk_buff *head; 255fa438ccfSEric Dumazet struct sk_buff *tail; 256fa438ccfSEric Dumazet } sk_backlog; 2571da177e4SLinus Torvalds wait_queue_head_t *sk_sleep; 2581da177e4SLinus Torvalds struct dst_entry *sk_dst_cache; 259def8b4faSAlexey Dobriyan #ifdef CONFIG_XFRM 2601da177e4SLinus Torvalds struct xfrm_policy *sk_policy[2]; 261def8b4faSAlexey Dobriyan #endif 2621da177e4SLinus Torvalds rwlock_t sk_dst_lock; 2631da177e4SLinus Torvalds atomic_t sk_rmem_alloc; 2641da177e4SLinus Torvalds atomic_t sk_wmem_alloc; 2651da177e4SLinus Torvalds atomic_t sk_omem_alloc; 2664e07a91cSArnaldo Carvalho de Melo int sk_sndbuf; 2671da177e4SLinus Torvalds struct sk_buff_head sk_receive_queue; 2681da177e4SLinus Torvalds struct sk_buff_head sk_write_queue; 26923789824SAlexey Dobriyan #ifdef CONFIG_NET_DMA 27097fc2f08SChris Leech struct sk_buff_head sk_async_wait_queue; 27123789824SAlexey Dobriyan #endif 2721da177e4SLinus Torvalds int sk_wmem_queued; 2731da177e4SLinus Torvalds int sk_forward_alloc; 2747d877f3bSAl Viro gfp_t sk_allocation; 2751da177e4SLinus Torvalds int sk_route_caps; 276bcd76111SHerbert Xu int sk_gso_type; 27782cc1a7aSPeter P Waskiewicz Jr unsigned int sk_gso_max_size; 2789932cf95SDavid S. Miller int sk_rcvlowat; 2791da177e4SLinus Torvalds unsigned long sk_flags; 2801da177e4SLinus Torvalds unsigned long sk_lingertime; 2811da177e4SLinus Torvalds struct sk_buff_head sk_error_queue; 282476e19cfSArnaldo Carvalho de Melo struct proto *sk_prot_creator; 2831da177e4SLinus Torvalds rwlock_t sk_callback_lock; 2841da177e4SLinus Torvalds int sk_err, 2851da177e4SLinus Torvalds sk_err_soft; 28633c732c3SWang Chen atomic_t sk_drops; 2871da177e4SLinus Torvalds unsigned short sk_ack_backlog; 2881da177e4SLinus Torvalds unsigned short sk_max_ack_backlog; 2891da177e4SLinus Torvalds __u32 sk_priority; 2901da177e4SLinus Torvalds struct ucred sk_peercred; 2911da177e4SLinus Torvalds long sk_rcvtimeo; 2921da177e4SLinus Torvalds long sk_sndtimeo; 2931da177e4SLinus Torvalds struct sk_filter *sk_filter; 2941da177e4SLinus Torvalds void *sk_protinfo; 2951da177e4SLinus Torvalds struct timer_list sk_timer; 296b7aa0bf7SEric Dumazet ktime_t sk_stamp; 2971da177e4SLinus Torvalds struct socket *sk_socket; 2981da177e4SLinus Torvalds void *sk_user_data; 2991da177e4SLinus Torvalds struct page *sk_sndmsg_page; 3001da177e4SLinus Torvalds struct sk_buff *sk_send_head; 3011da177e4SLinus Torvalds __u32 sk_sndmsg_off; 3021da177e4SLinus Torvalds int sk_write_pending; 303d5f64238SAlexey Dobriyan #ifdef CONFIG_SECURITY 3041da177e4SLinus Torvalds void *sk_security; 305d5f64238SAlexey Dobriyan #endif 3064a19ec58SLaszlo Attila Toth __u32 sk_mark; 3074a19ec58SLaszlo Attila Toth /* XXX 4 bytes hole on 64 bit */ 3081da177e4SLinus Torvalds void (*sk_state_change)(struct sock *sk); 3091da177e4SLinus Torvalds void (*sk_data_ready)(struct sock *sk, int bytes); 3101da177e4SLinus Torvalds void (*sk_write_space)(struct sock *sk); 3111da177e4SLinus Torvalds void (*sk_error_report)(struct sock *sk); 3121da177e4SLinus Torvalds int (*sk_backlog_rcv)(struct sock *sk, 3131da177e4SLinus Torvalds struct sk_buff *skb); 3141da177e4SLinus Torvalds void (*sk_destruct)(struct sock *sk); 3151da177e4SLinus Torvalds }; 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds /* 3181da177e4SLinus Torvalds * Hashed lists helper routines 3191da177e4SLinus Torvalds */ 320e48c414eSArnaldo Carvalho de Melo static inline struct sock *__sk_head(const struct hlist_head *head) 3211da177e4SLinus Torvalds { 3221da177e4SLinus Torvalds return hlist_entry(head->first, struct sock, sk_node); 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds 325e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_head(const struct hlist_head *head) 3261da177e4SLinus Torvalds { 3271da177e4SLinus Torvalds return hlist_empty(head) ? NULL : __sk_head(head); 3281da177e4SLinus Torvalds } 3291da177e4SLinus Torvalds 33088ab1932SEric Dumazet static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) 33188ab1932SEric Dumazet { 33288ab1932SEric Dumazet return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); 33388ab1932SEric Dumazet } 33488ab1932SEric Dumazet 33588ab1932SEric Dumazet static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) 33688ab1932SEric Dumazet { 33788ab1932SEric Dumazet return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); 33888ab1932SEric Dumazet } 33988ab1932SEric Dumazet 340e48c414eSArnaldo Carvalho de Melo static inline struct sock *sk_next(const struct sock *sk) 3411da177e4SLinus Torvalds { 3421da177e4SLinus Torvalds return sk->sk_node.next ? 3431da177e4SLinus Torvalds hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; 3441da177e4SLinus Torvalds } 3451da177e4SLinus Torvalds 34688ab1932SEric Dumazet static inline struct sock *sk_nulls_next(const struct sock *sk) 34788ab1932SEric Dumazet { 34888ab1932SEric Dumazet return (!is_a_nulls(sk->sk_nulls_node.next)) ? 34988ab1932SEric Dumazet hlist_nulls_entry(sk->sk_nulls_node.next, 35088ab1932SEric Dumazet struct sock, sk_nulls_node) : 35188ab1932SEric Dumazet NULL; 35288ab1932SEric Dumazet } 35388ab1932SEric Dumazet 354e48c414eSArnaldo Carvalho de Melo static inline int sk_unhashed(const struct sock *sk) 3551da177e4SLinus Torvalds { 3561da177e4SLinus Torvalds return hlist_unhashed(&sk->sk_node); 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds 359e48c414eSArnaldo Carvalho de Melo static inline int sk_hashed(const struct sock *sk) 3601da177e4SLinus Torvalds { 361da753beaSAkinobu Mita return !sk_unhashed(sk); 3621da177e4SLinus Torvalds } 3631da177e4SLinus Torvalds 3641da177e4SLinus Torvalds static __inline__ void sk_node_init(struct hlist_node *node) 3651da177e4SLinus Torvalds { 3661da177e4SLinus Torvalds node->pprev = NULL; 3671da177e4SLinus Torvalds } 3681da177e4SLinus Torvalds 36988ab1932SEric Dumazet static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) 37088ab1932SEric Dumazet { 37188ab1932SEric Dumazet node->pprev = NULL; 37288ab1932SEric Dumazet } 37388ab1932SEric Dumazet 3741da177e4SLinus Torvalds static __inline__ void __sk_del_node(struct sock *sk) 3751da177e4SLinus Torvalds { 3761da177e4SLinus Torvalds __hlist_del(&sk->sk_node); 3771da177e4SLinus Torvalds } 3781da177e4SLinus Torvalds 3791da177e4SLinus Torvalds static __inline__ int __sk_del_node_init(struct sock *sk) 3801da177e4SLinus Torvalds { 3811da177e4SLinus Torvalds if (sk_hashed(sk)) { 3821da177e4SLinus Torvalds __sk_del_node(sk); 3831da177e4SLinus Torvalds sk_node_init(&sk->sk_node); 3841da177e4SLinus Torvalds return 1; 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds return 0; 3871da177e4SLinus Torvalds } 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds /* Grab socket reference count. This operation is valid only 3901da177e4SLinus Torvalds when sk is ALREADY grabbed f.e. it is found in hash table 3911da177e4SLinus Torvalds or a list and the lookup is made under lock preventing hash table 3921da177e4SLinus Torvalds modifications. 3931da177e4SLinus Torvalds */ 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds static inline void sock_hold(struct sock *sk) 3961da177e4SLinus Torvalds { 3971da177e4SLinus Torvalds atomic_inc(&sk->sk_refcnt); 3981da177e4SLinus Torvalds } 3991da177e4SLinus Torvalds 4001da177e4SLinus Torvalds /* Ungrab socket in the context, which assumes that socket refcnt 4011da177e4SLinus Torvalds cannot hit zero, f.e. it is true in context of any socketcall. 4021da177e4SLinus Torvalds */ 4031da177e4SLinus Torvalds static inline void __sock_put(struct sock *sk) 4041da177e4SLinus Torvalds { 4051da177e4SLinus Torvalds atomic_dec(&sk->sk_refcnt); 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds static __inline__ int sk_del_node_init(struct sock *sk) 4091da177e4SLinus Torvalds { 4101da177e4SLinus Torvalds int rc = __sk_del_node_init(sk); 4111da177e4SLinus Torvalds 4121da177e4SLinus Torvalds if (rc) { 4131da177e4SLinus Torvalds /* paranoid for a while -acme */ 4141da177e4SLinus Torvalds WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 4151da177e4SLinus Torvalds __sock_put(sk); 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds return rc; 4181da177e4SLinus Torvalds } 4191da177e4SLinus Torvalds 42088ab1932SEric Dumazet static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) 421271b72c7SEric Dumazet { 422271b72c7SEric Dumazet if (sk_hashed(sk)) { 42388ab1932SEric Dumazet hlist_nulls_del_init_rcu(&sk->sk_nulls_node); 424271b72c7SEric Dumazet return 1; 425271b72c7SEric Dumazet } 426271b72c7SEric Dumazet return 0; 427271b72c7SEric Dumazet } 428271b72c7SEric Dumazet 42988ab1932SEric Dumazet static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) 430271b72c7SEric Dumazet { 43188ab1932SEric Dumazet int rc = __sk_nulls_del_node_init_rcu(sk); 432271b72c7SEric Dumazet 433271b72c7SEric Dumazet if (rc) { 434271b72c7SEric Dumazet /* paranoid for a while -acme */ 435271b72c7SEric Dumazet WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 436271b72c7SEric Dumazet __sock_put(sk); 437271b72c7SEric Dumazet } 438271b72c7SEric Dumazet return rc; 439271b72c7SEric Dumazet } 440271b72c7SEric Dumazet 4411da177e4SLinus Torvalds static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) 4421da177e4SLinus Torvalds { 4431da177e4SLinus Torvalds hlist_add_head(&sk->sk_node, list); 4441da177e4SLinus Torvalds } 4451da177e4SLinus Torvalds 4461da177e4SLinus Torvalds static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) 4471da177e4SLinus Torvalds { 4481da177e4SLinus Torvalds sock_hold(sk); 4491da177e4SLinus Torvalds __sk_add_node(sk, list); 4501da177e4SLinus Torvalds } 4511da177e4SLinus Torvalds 45288ab1932SEric Dumazet static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) 453271b72c7SEric Dumazet { 45488ab1932SEric Dumazet hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); 455271b72c7SEric Dumazet } 456271b72c7SEric Dumazet 45788ab1932SEric Dumazet static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) 458271b72c7SEric Dumazet { 459271b72c7SEric Dumazet sock_hold(sk); 46088ab1932SEric Dumazet __sk_nulls_add_node_rcu(sk, list); 461271b72c7SEric Dumazet } 462271b72c7SEric Dumazet 4631da177e4SLinus Torvalds static __inline__ void __sk_del_bind_node(struct sock *sk) 4641da177e4SLinus Torvalds { 4651da177e4SLinus Torvalds __hlist_del(&sk->sk_bind_node); 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds static __inline__ void sk_add_bind_node(struct sock *sk, 4691da177e4SLinus Torvalds struct hlist_head *list) 4701da177e4SLinus Torvalds { 4711da177e4SLinus Torvalds hlist_add_head(&sk->sk_bind_node, list); 4721da177e4SLinus Torvalds } 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds #define sk_for_each(__sk, node, list) \ 4751da177e4SLinus Torvalds hlist_for_each_entry(__sk, node, list, sk_node) 47688ab1932SEric Dumazet #define sk_nulls_for_each(__sk, node, list) \ 47788ab1932SEric Dumazet hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) 47888ab1932SEric Dumazet #define sk_nulls_for_each_rcu(__sk, node, list) \ 47988ab1932SEric Dumazet hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) 4801da177e4SLinus Torvalds #define sk_for_each_from(__sk, node) \ 4811da177e4SLinus Torvalds if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ 4821da177e4SLinus Torvalds hlist_for_each_entry_from(__sk, node, sk_node) 48388ab1932SEric Dumazet #define sk_nulls_for_each_from(__sk, node) \ 48488ab1932SEric Dumazet if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ 48588ab1932SEric Dumazet hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) 4861da177e4SLinus Torvalds #define sk_for_each_continue(__sk, node) \ 4871da177e4SLinus Torvalds if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ 4881da177e4SLinus Torvalds hlist_for_each_entry_continue(__sk, node, sk_node) 4891da177e4SLinus Torvalds #define sk_for_each_safe(__sk, node, tmp, list) \ 4901da177e4SLinus Torvalds hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) 4911da177e4SLinus Torvalds #define sk_for_each_bound(__sk, node, list) \ 4921da177e4SLinus Torvalds hlist_for_each_entry(__sk, node, list, sk_bind_node) 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds /* Sock flags */ 4951da177e4SLinus Torvalds enum sock_flags { 4961da177e4SLinus Torvalds SOCK_DEAD, 4971da177e4SLinus Torvalds SOCK_DONE, 4981da177e4SLinus Torvalds SOCK_URGINLINE, 4991da177e4SLinus Torvalds SOCK_KEEPOPEN, 5001da177e4SLinus Torvalds SOCK_LINGER, 5011da177e4SLinus Torvalds SOCK_DESTROY, 5021da177e4SLinus Torvalds SOCK_BROADCAST, 5031da177e4SLinus Torvalds SOCK_TIMESTAMP, 5041da177e4SLinus Torvalds SOCK_ZAPPED, 5051da177e4SLinus Torvalds SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ 5061da177e4SLinus Torvalds SOCK_DBG, /* %SO_DEBUG setting */ 5071da177e4SLinus Torvalds SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ 50892f37fd2SEric Dumazet SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ 5091da177e4SLinus Torvalds SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ 5101da177e4SLinus Torvalds SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 51120d49473SPatrick Ohly SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ 51220d49473SPatrick Ohly SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ 51320d49473SPatrick Ohly SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ 51420d49473SPatrick Ohly SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ 51520d49473SPatrick Ohly SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ 51620d49473SPatrick Ohly SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ 51720d49473SPatrick Ohly SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ 518bcdce719SEric Dumazet SOCK_FASYNC, /* fasync() active */ 5193b885787SNeil Horman SOCK_RXQ_OVFL, 5201da177e4SLinus Torvalds }; 5211da177e4SLinus Torvalds 52253b924b3SRalf Baechle static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) 52353b924b3SRalf Baechle { 52453b924b3SRalf Baechle nsk->sk_flags = osk->sk_flags; 52553b924b3SRalf Baechle } 52653b924b3SRalf Baechle 5271da177e4SLinus Torvalds static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) 5281da177e4SLinus Torvalds { 5291da177e4SLinus Torvalds __set_bit(flag, &sk->sk_flags); 5301da177e4SLinus Torvalds } 5311da177e4SLinus Torvalds 5321da177e4SLinus Torvalds static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag) 5331da177e4SLinus Torvalds { 5341da177e4SLinus Torvalds __clear_bit(flag, &sk->sk_flags); 5351da177e4SLinus Torvalds } 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds static inline int sock_flag(struct sock *sk, enum sock_flags flag) 5381da177e4SLinus Torvalds { 5391da177e4SLinus Torvalds return test_bit(flag, &sk->sk_flags); 5401da177e4SLinus Torvalds } 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds static inline void sk_acceptq_removed(struct sock *sk) 5431da177e4SLinus Torvalds { 5441da177e4SLinus Torvalds sk->sk_ack_backlog--; 5451da177e4SLinus Torvalds } 5461da177e4SLinus Torvalds 5471da177e4SLinus Torvalds static inline void sk_acceptq_added(struct sock *sk) 5481da177e4SLinus Torvalds { 5491da177e4SLinus Torvalds sk->sk_ack_backlog++; 5501da177e4SLinus Torvalds } 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds static inline int sk_acceptq_is_full(struct sock *sk) 5531da177e4SLinus Torvalds { 55464a14651SDavid S. Miller return sk->sk_ack_backlog > sk->sk_max_ack_backlog; 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds /* 5581da177e4SLinus Torvalds * Compute minimal free write space needed to queue new packets. 5591da177e4SLinus Torvalds */ 5601da177e4SLinus Torvalds static inline int sk_stream_min_wspace(struct sock *sk) 5611da177e4SLinus Torvalds { 5628df09ea3SEric Dumazet return sk->sk_wmem_queued >> 1; 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds static inline int sk_stream_wspace(struct sock *sk) 5661da177e4SLinus Torvalds { 5671da177e4SLinus Torvalds return sk->sk_sndbuf - sk->sk_wmem_queued; 5681da177e4SLinus Torvalds } 5691da177e4SLinus Torvalds 5701da177e4SLinus Torvalds extern void sk_stream_write_space(struct sock *sk); 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds static inline int sk_stream_memory_free(struct sock *sk) 5731da177e4SLinus Torvalds { 5741da177e4SLinus Torvalds return sk->sk_wmem_queued < sk->sk_sndbuf; 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds /* The per-socket spinlock must be held here. */ 5789ee6b535SStephen Hemminger static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) 5799ee6b535SStephen Hemminger { 5809ee6b535SStephen Hemminger if (!sk->sk_backlog.tail) { 5819ee6b535SStephen Hemminger sk->sk_backlog.head = sk->sk_backlog.tail = skb; 5829ee6b535SStephen Hemminger } else { 5839ee6b535SStephen Hemminger sk->sk_backlog.tail->next = skb; 5849ee6b535SStephen Hemminger sk->sk_backlog.tail = skb; 5859ee6b535SStephen Hemminger } 5869ee6b535SStephen Hemminger skb->next = NULL; 5879ee6b535SStephen Hemminger } 5881da177e4SLinus Torvalds 589c57943a1SPeter Zijlstra static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 590c57943a1SPeter Zijlstra { 591c57943a1SPeter Zijlstra return sk->sk_backlog_rcv(sk, skb); 592c57943a1SPeter Zijlstra } 593c57943a1SPeter Zijlstra 5941da177e4SLinus Torvalds #define sk_wait_event(__sk, __timeo, __condition) \ 595cfcabdccSStephen Hemminger ({ int __rc; \ 5961da177e4SLinus Torvalds release_sock(__sk); \ 597cfcabdccSStephen Hemminger __rc = __condition; \ 598cfcabdccSStephen Hemminger if (!__rc) { \ 5991da177e4SLinus Torvalds *(__timeo) = schedule_timeout(*(__timeo)); \ 6001da177e4SLinus Torvalds } \ 6011da177e4SLinus Torvalds lock_sock(__sk); \ 602cfcabdccSStephen Hemminger __rc = __condition; \ 603cfcabdccSStephen Hemminger __rc; \ 6041da177e4SLinus Torvalds }) 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); 6071da177e4SLinus Torvalds extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); 6081da177e4SLinus Torvalds extern void sk_stream_wait_close(struct sock *sk, long timeo_p); 6091da177e4SLinus Torvalds extern int sk_stream_error(struct sock *sk, int flags, int err); 6101da177e4SLinus Torvalds extern void sk_stream_kill_queues(struct sock *sk); 6111da177e4SLinus Torvalds 6121da177e4SLinus Torvalds extern int sk_wait_data(struct sock *sk, long *timeo); 6131da177e4SLinus Torvalds 61460236fddSArnaldo Carvalho de Melo struct request_sock_ops; 6156d6ee43eSArnaldo Carvalho de Melo struct timewait_sock_ops; 616ab1e0a13SArnaldo Carvalho de Melo struct inet_hashinfo; 617fc8717baSPavel Emelyanov struct raw_hashinfo; 6182e6599cbSArnaldo Carvalho de Melo 6191da177e4SLinus Torvalds /* Networking protocol blocks we attach to sockets. 6201da177e4SLinus Torvalds * socket layer -> transport layer interface 6211da177e4SLinus Torvalds * transport -> network interface is defined by struct inet_proto 6221da177e4SLinus Torvalds */ 6231da177e4SLinus Torvalds struct proto { 6241da177e4SLinus Torvalds void (*close)(struct sock *sk, 6251da177e4SLinus Torvalds long timeout); 6261da177e4SLinus Torvalds int (*connect)(struct sock *sk, 6271da177e4SLinus Torvalds struct sockaddr *uaddr, 6281da177e4SLinus Torvalds int addr_len); 6291da177e4SLinus Torvalds int (*disconnect)(struct sock *sk, int flags); 6301da177e4SLinus Torvalds 6311da177e4SLinus Torvalds struct sock * (*accept) (struct sock *sk, int flags, int *err); 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds int (*ioctl)(struct sock *sk, int cmd, 6341da177e4SLinus Torvalds unsigned long arg); 6351da177e4SLinus Torvalds int (*init)(struct sock *sk); 6367d06b2e0SBrian Haley void (*destroy)(struct sock *sk); 6371da177e4SLinus Torvalds void (*shutdown)(struct sock *sk, int how); 6381da177e4SLinus Torvalds int (*setsockopt)(struct sock *sk, int level, 6391da177e4SLinus Torvalds int optname, char __user *optval, 640b7058842SDavid S. Miller unsigned int optlen); 6411da177e4SLinus Torvalds int (*getsockopt)(struct sock *sk, int level, 6421da177e4SLinus Torvalds int optname, char __user *optval, 6431da177e4SLinus Torvalds int __user *option); 644af01d537SAlexey Dobriyan #ifdef CONFIG_COMPAT 6453fdadf7dSDmitry Mishin int (*compat_setsockopt)(struct sock *sk, 6463fdadf7dSDmitry Mishin int level, 6473fdadf7dSDmitry Mishin int optname, char __user *optval, 648b7058842SDavid S. Miller unsigned int optlen); 6493fdadf7dSDmitry Mishin int (*compat_getsockopt)(struct sock *sk, 6503fdadf7dSDmitry Mishin int level, 6513fdadf7dSDmitry Mishin int optname, char __user *optval, 6523fdadf7dSDmitry Mishin int __user *option); 653af01d537SAlexey Dobriyan #endif 6541da177e4SLinus Torvalds int (*sendmsg)(struct kiocb *iocb, struct sock *sk, 6551da177e4SLinus Torvalds struct msghdr *msg, size_t len); 6561da177e4SLinus Torvalds int (*recvmsg)(struct kiocb *iocb, struct sock *sk, 6571da177e4SLinus Torvalds struct msghdr *msg, 6581da177e4SLinus Torvalds size_t len, int noblock, int flags, 6591da177e4SLinus Torvalds int *addr_len); 6601da177e4SLinus Torvalds int (*sendpage)(struct sock *sk, struct page *page, 6611da177e4SLinus Torvalds int offset, size_t size, int flags); 6621da177e4SLinus Torvalds int (*bind)(struct sock *sk, 6631da177e4SLinus Torvalds struct sockaddr *uaddr, int addr_len); 6641da177e4SLinus Torvalds 6651da177e4SLinus Torvalds int (*backlog_rcv) (struct sock *sk, 6661da177e4SLinus Torvalds struct sk_buff *skb); 6671da177e4SLinus Torvalds 6681da177e4SLinus Torvalds /* Keeping track of sk's, looking them up, and port selection methods. */ 6691da177e4SLinus Torvalds void (*hash)(struct sock *sk); 6701da177e4SLinus Torvalds void (*unhash)(struct sock *sk); 6711da177e4SLinus Torvalds int (*get_port)(struct sock *sk, unsigned short snum); 6721da177e4SLinus Torvalds 673286ab3d4SEric Dumazet /* Keeping track of sockets in use */ 67465f76517SEric Dumazet #ifdef CONFIG_PROC_FS 67513ff3d6fSPavel Emelyanov unsigned int inuse_idx; 67665f76517SEric Dumazet #endif 677ebb53d75SArnaldo Carvalho de Melo 6781da177e4SLinus Torvalds /* Memory pressure */ 6795c52ba17SPavel Emelyanov void (*enter_memory_pressure)(struct sock *sk); 6801da177e4SLinus Torvalds atomic_t *memory_allocated; /* Current allocated memory. */ 6811748376bSEric Dumazet struct percpu_counter *sockets_allocated; /* Current number of sockets. */ 6821da177e4SLinus Torvalds /* 6831da177e4SLinus Torvalds * Pressure flag: try to collapse. 6841da177e4SLinus Torvalds * Technical note: it is used by multiple contexts non atomically. 6853ab224beSHideo Aoki * All the __sk_mem_schedule() is of this nature: accounting 6861da177e4SLinus Torvalds * is strict, actions are advisory and have some latency. 6871da177e4SLinus Torvalds */ 6881da177e4SLinus Torvalds int *memory_pressure; 6891da177e4SLinus Torvalds int *sysctl_mem; 6901da177e4SLinus Torvalds int *sysctl_wmem; 6911da177e4SLinus Torvalds int *sysctl_rmem; 6921da177e4SLinus Torvalds int max_header; 6931da177e4SLinus Torvalds 694e18b890bSChristoph Lameter struct kmem_cache *slab; 6951da177e4SLinus Torvalds unsigned int obj_size; 696271b72c7SEric Dumazet int slab_flags; 6971da177e4SLinus Torvalds 698dd24c001SEric Dumazet struct percpu_counter *orphan_count; 6998feaf0c0SArnaldo Carvalho de Melo 70060236fddSArnaldo Carvalho de Melo struct request_sock_ops *rsk_prot; 7016d6ee43eSArnaldo Carvalho de Melo struct timewait_sock_ops *twsk_prot; 7022e6599cbSArnaldo Carvalho de Melo 70339d8cda7SPavel Emelyanov union { 704ab1e0a13SArnaldo Carvalho de Melo struct inet_hashinfo *hashinfo; 705645ca708SEric Dumazet struct udp_table *udp_table; 706fc8717baSPavel Emelyanov struct raw_hashinfo *raw_hash; 70739d8cda7SPavel Emelyanov } h; 708ab1e0a13SArnaldo Carvalho de Melo 7091da177e4SLinus Torvalds struct module *owner; 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds char name[32]; 7121da177e4SLinus Torvalds 7131da177e4SLinus Torvalds struct list_head node; 714e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG 715e6848976SArnaldo Carvalho de Melo atomic_t socks; 716e6848976SArnaldo Carvalho de Melo #endif 7171da177e4SLinus Torvalds }; 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds extern int proto_register(struct proto *prot, int alloc_slab); 7201da177e4SLinus Torvalds extern void proto_unregister(struct proto *prot); 7211da177e4SLinus Torvalds 722e6848976SArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG 723e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_inc(struct sock *sk) 724e6848976SArnaldo Carvalho de Melo { 725e6848976SArnaldo Carvalho de Melo atomic_inc(&sk->sk_prot->socks); 726e6848976SArnaldo Carvalho de Melo } 727e6848976SArnaldo Carvalho de Melo 728e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_dec(struct sock *sk) 729e6848976SArnaldo Carvalho de Melo { 730e6848976SArnaldo Carvalho de Melo atomic_dec(&sk->sk_prot->socks); 731e6848976SArnaldo Carvalho de Melo printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", 732e6848976SArnaldo Carvalho de Melo sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); 733e6848976SArnaldo Carvalho de Melo } 734e6848976SArnaldo Carvalho de Melo 735e6848976SArnaldo Carvalho de Melo static inline void sk_refcnt_debug_release(const struct sock *sk) 736e6848976SArnaldo Carvalho de Melo { 737e6848976SArnaldo Carvalho de Melo if (atomic_read(&sk->sk_refcnt) != 1) 738e6848976SArnaldo Carvalho de Melo printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", 739e6848976SArnaldo Carvalho de Melo sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); 740e6848976SArnaldo Carvalho de Melo } 741e6848976SArnaldo Carvalho de Melo #else /* SOCK_REFCNT_DEBUG */ 742e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_inc(sk) do { } while (0) 743e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_dec(sk) do { } while (0) 744e6848976SArnaldo Carvalho de Melo #define sk_refcnt_debug_release(sk) do { } while (0) 745e6848976SArnaldo Carvalho de Melo #endif /* SOCK_REFCNT_DEBUG */ 746e6848976SArnaldo Carvalho de Melo 74765f76517SEric Dumazet 74865f76517SEric Dumazet #ifdef CONFIG_PROC_FS 7491da177e4SLinus Torvalds /* Called with local bh disabled */ 750c29a0bc4SPavel Emelyanov extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); 751c29a0bc4SPavel Emelyanov extern int sock_prot_inuse_get(struct net *net, struct proto *proto); 75265f76517SEric Dumazet #else 753c29a0bc4SPavel Emelyanov static void inline sock_prot_inuse_add(struct net *net, struct proto *prot, 754c29a0bc4SPavel Emelyanov int inc) 75565f76517SEric Dumazet { 75665f76517SEric Dumazet } 75765f76517SEric Dumazet #endif 75865f76517SEric Dumazet 7591da177e4SLinus Torvalds 760614c6cb4SArnaldo Carvalho de Melo /* With per-bucket locks this operation is not-atomic, so that 761614c6cb4SArnaldo Carvalho de Melo * this version is not worse. 762614c6cb4SArnaldo Carvalho de Melo */ 763614c6cb4SArnaldo Carvalho de Melo static inline void __sk_prot_rehash(struct sock *sk) 764614c6cb4SArnaldo Carvalho de Melo { 765614c6cb4SArnaldo Carvalho de Melo sk->sk_prot->unhash(sk); 766614c6cb4SArnaldo Carvalho de Melo sk->sk_prot->hash(sk); 767614c6cb4SArnaldo Carvalho de Melo } 768614c6cb4SArnaldo Carvalho de Melo 7691da177e4SLinus Torvalds /* About 10 seconds */ 7701da177e4SLinus Torvalds #define SOCK_DESTROY_TIME (10*HZ) 7711da177e4SLinus Torvalds 7721da177e4SLinus Torvalds /* Sockets 0-1023 can't be bound to unless you are superuser */ 7731da177e4SLinus Torvalds #define PROT_SOCK 1024 7741da177e4SLinus Torvalds 7751da177e4SLinus Torvalds #define SHUTDOWN_MASK 3 7761da177e4SLinus Torvalds #define RCV_SHUTDOWN 1 7771da177e4SLinus Torvalds #define SEND_SHUTDOWN 2 7781da177e4SLinus Torvalds 7791da177e4SLinus Torvalds #define SOCK_SNDBUF_LOCK 1 7801da177e4SLinus Torvalds #define SOCK_RCVBUF_LOCK 2 7811da177e4SLinus Torvalds #define SOCK_BINDADDR_LOCK 4 7821da177e4SLinus Torvalds #define SOCK_BINDPORT_LOCK 8 7831da177e4SLinus Torvalds 7841da177e4SLinus Torvalds /* sock_iocb: used to kick off async processing of socket ios */ 7851da177e4SLinus Torvalds struct sock_iocb { 7861da177e4SLinus Torvalds struct list_head list; 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds int flags; 7891da177e4SLinus Torvalds int size; 7901da177e4SLinus Torvalds struct socket *sock; 7911da177e4SLinus Torvalds struct sock *sk; 7921da177e4SLinus Torvalds struct scm_cookie *scm; 7931da177e4SLinus Torvalds struct msghdr *msg, async_msg; 7941da177e4SLinus Torvalds struct kiocb *kiocb; 7951da177e4SLinus Torvalds }; 7961da177e4SLinus Torvalds 7971da177e4SLinus Torvalds static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb) 7981da177e4SLinus Torvalds { 7991da177e4SLinus Torvalds return (struct sock_iocb *)iocb->private; 8001da177e4SLinus Torvalds } 8011da177e4SLinus Torvalds 8021da177e4SLinus Torvalds static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si) 8031da177e4SLinus Torvalds { 8041da177e4SLinus Torvalds return si->kiocb; 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds 8071da177e4SLinus Torvalds struct socket_alloc { 8081da177e4SLinus Torvalds struct socket socket; 8091da177e4SLinus Torvalds struct inode vfs_inode; 8101da177e4SLinus Torvalds }; 8111da177e4SLinus Torvalds 8121da177e4SLinus Torvalds static inline struct socket *SOCKET_I(struct inode *inode) 8131da177e4SLinus Torvalds { 8141da177e4SLinus Torvalds return &container_of(inode, struct socket_alloc, vfs_inode)->socket; 8151da177e4SLinus Torvalds } 8161da177e4SLinus Torvalds 8171da177e4SLinus Torvalds static inline struct inode *SOCK_INODE(struct socket *socket) 8181da177e4SLinus Torvalds { 8191da177e4SLinus Torvalds return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds 8223ab224beSHideo Aoki /* 8233ab224beSHideo Aoki * Functions for memory accounting 8243ab224beSHideo Aoki */ 8253ab224beSHideo Aoki extern int __sk_mem_schedule(struct sock *sk, int size, int kind); 8263ab224beSHideo Aoki extern void __sk_mem_reclaim(struct sock *sk); 8271da177e4SLinus Torvalds 8283ab224beSHideo Aoki #define SK_MEM_QUANTUM ((int)PAGE_SIZE) 8293ab224beSHideo Aoki #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) 8303ab224beSHideo Aoki #define SK_MEM_SEND 0 8313ab224beSHideo Aoki #define SK_MEM_RECV 1 8321da177e4SLinus Torvalds 8333ab224beSHideo Aoki static inline int sk_mem_pages(int amt) 8341da177e4SLinus Torvalds { 8353ab224beSHideo Aoki return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; 8361da177e4SLinus Torvalds } 8371da177e4SLinus Torvalds 8383ab224beSHideo Aoki static inline int sk_has_account(struct sock *sk) 8391da177e4SLinus Torvalds { 8403ab224beSHideo Aoki /* return true if protocol supports memory accounting */ 8413ab224beSHideo Aoki return !!sk->sk_prot->memory_allocated; 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds 8443ab224beSHideo Aoki static inline int sk_wmem_schedule(struct sock *sk, int size) 8451da177e4SLinus Torvalds { 8463ab224beSHideo Aoki if (!sk_has_account(sk)) 8473ab224beSHideo Aoki return 1; 848d80d99d6SHerbert Xu return size <= sk->sk_forward_alloc || 8493ab224beSHideo Aoki __sk_mem_schedule(sk, size, SK_MEM_SEND); 8503ab224beSHideo Aoki } 8513ab224beSHideo Aoki 8523ab224beSHideo Aoki static inline int sk_rmem_schedule(struct sock *sk, int size) 8533ab224beSHideo Aoki { 8543ab224beSHideo Aoki if (!sk_has_account(sk)) 8553ab224beSHideo Aoki return 1; 8563ab224beSHideo Aoki return size <= sk->sk_forward_alloc || 8573ab224beSHideo Aoki __sk_mem_schedule(sk, size, SK_MEM_RECV); 8583ab224beSHideo Aoki } 8593ab224beSHideo Aoki 8603ab224beSHideo Aoki static inline void sk_mem_reclaim(struct sock *sk) 8613ab224beSHideo Aoki { 8623ab224beSHideo Aoki if (!sk_has_account(sk)) 8633ab224beSHideo Aoki return; 8643ab224beSHideo Aoki if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) 8653ab224beSHideo Aoki __sk_mem_reclaim(sk); 8663ab224beSHideo Aoki } 8673ab224beSHideo Aoki 8689993e7d3SDavid S. Miller static inline void sk_mem_reclaim_partial(struct sock *sk) 8699993e7d3SDavid S. Miller { 8709993e7d3SDavid S. Miller if (!sk_has_account(sk)) 8719993e7d3SDavid S. Miller return; 8729993e7d3SDavid S. Miller if (sk->sk_forward_alloc > SK_MEM_QUANTUM) 8739993e7d3SDavid S. Miller __sk_mem_reclaim(sk); 8749993e7d3SDavid S. Miller } 8759993e7d3SDavid S. Miller 8763ab224beSHideo Aoki static inline void sk_mem_charge(struct sock *sk, int size) 8773ab224beSHideo Aoki { 8783ab224beSHideo Aoki if (!sk_has_account(sk)) 8793ab224beSHideo Aoki return; 8803ab224beSHideo Aoki sk->sk_forward_alloc -= size; 8813ab224beSHideo Aoki } 8823ab224beSHideo Aoki 8833ab224beSHideo Aoki static inline void sk_mem_uncharge(struct sock *sk, int size) 8843ab224beSHideo Aoki { 8853ab224beSHideo Aoki if (!sk_has_account(sk)) 8863ab224beSHideo Aoki return; 8873ab224beSHideo Aoki sk->sk_forward_alloc += size; 8883ab224beSHideo Aoki } 8893ab224beSHideo Aoki 8903ab224beSHideo Aoki static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) 8913ab224beSHideo Aoki { 8923ab224beSHideo Aoki sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 8933ab224beSHideo Aoki sk->sk_wmem_queued -= skb->truesize; 8943ab224beSHideo Aoki sk_mem_uncharge(sk, skb->truesize); 8953ab224beSHideo Aoki __kfree_skb(skb); 896d80d99d6SHerbert Xu } 897d80d99d6SHerbert Xu 8981da177e4SLinus Torvalds /* Used by processes to "lock" a socket state, so that 8991da177e4SLinus Torvalds * interrupts and bottom half handlers won't change it 9001da177e4SLinus Torvalds * from under us. It essentially blocks any incoming 9011da177e4SLinus Torvalds * packets, so that we won't get any new data or any 9021da177e4SLinus Torvalds * packets that change the state of the socket. 9031da177e4SLinus Torvalds * 9041da177e4SLinus Torvalds * While locked, BH processing will add new packets to 9051da177e4SLinus Torvalds * the backlog queue. This queue is processed by the 9061da177e4SLinus Torvalds * owner of the socket lock right before it is released. 9071da177e4SLinus Torvalds * 9081da177e4SLinus Torvalds * Since ~2.3.5 it is also exclusive sleep lock serializing 9091da177e4SLinus Torvalds * accesses from user process context. 9101da177e4SLinus Torvalds */ 911d2e9117cSJohn Heffner #define sock_owned_by_user(sk) ((sk)->sk_lock.owned) 9121da177e4SLinus Torvalds 913ed07536eSPeter Zijlstra /* 914ed07536eSPeter Zijlstra * Macro so as to not evaluate some arguments when 915ed07536eSPeter Zijlstra * lockdep is not enabled. 916ed07536eSPeter Zijlstra * 917ed07536eSPeter Zijlstra * Mark both the sk_lock and the sk_lock.slock as a 918ed07536eSPeter Zijlstra * per-address-family lock class. 919ed07536eSPeter Zijlstra */ 920ed07536eSPeter Zijlstra #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ 921ed07536eSPeter Zijlstra do { \ 922d2e9117cSJohn Heffner sk->sk_lock.owned = 0; \ 923ed07536eSPeter Zijlstra init_waitqueue_head(&sk->sk_lock.wq); \ 924ed07536eSPeter Zijlstra spin_lock_init(&(sk)->sk_lock.slock); \ 925ed07536eSPeter Zijlstra debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ 926ed07536eSPeter Zijlstra sizeof((sk)->sk_lock)); \ 927ed07536eSPeter Zijlstra lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ 928ed07536eSPeter Zijlstra (skey), (sname)); \ 929ed07536eSPeter Zijlstra lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ 930ed07536eSPeter Zijlstra } while (0) 931ed07536eSPeter Zijlstra 93241380930SHarvey Harrison extern void lock_sock_nested(struct sock *sk, int subclass); 933fcc70d5fSPeter Zijlstra 934fcc70d5fSPeter Zijlstra static inline void lock_sock(struct sock *sk) 935fcc70d5fSPeter Zijlstra { 936fcc70d5fSPeter Zijlstra lock_sock_nested(sk, 0); 937fcc70d5fSPeter Zijlstra } 938fcc70d5fSPeter Zijlstra 93941380930SHarvey Harrison extern void release_sock(struct sock *sk); 9401da177e4SLinus Torvalds 9411da177e4SLinus Torvalds /* BH context may only use the following locking interface. */ 9421da177e4SLinus Torvalds #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 943c6366184SIngo Molnar #define bh_lock_sock_nested(__sk) \ 944c6366184SIngo Molnar spin_lock_nested(&((__sk)->sk_lock.slock), \ 945c6366184SIngo Molnar SINGLE_DEPTH_NESTING) 9461da177e4SLinus Torvalds #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 9471da177e4SLinus Torvalds 9481b8d7ae4SEric W. Biederman extern struct sock *sk_alloc(struct net *net, int family, 949dd0fc66fSAl Viro gfp_t priority, 9506257ff21SPavel Emelyanov struct proto *prot); 9511da177e4SLinus Torvalds extern void sk_free(struct sock *sk); 952edf02087SDenis V. Lunev extern void sk_release_kernel(struct sock *sk); 95387d11cebSArnaldo Carvalho de Melo extern struct sock *sk_clone(const struct sock *sk, 954dd0fc66fSAl Viro const gfp_t priority); 9551da177e4SLinus Torvalds 9561da177e4SLinus Torvalds extern struct sk_buff *sock_wmalloc(struct sock *sk, 9571da177e4SLinus Torvalds unsigned long size, int force, 958dd0fc66fSAl Viro gfp_t priority); 9591da177e4SLinus Torvalds extern struct sk_buff *sock_rmalloc(struct sock *sk, 9601da177e4SLinus Torvalds unsigned long size, int force, 961dd0fc66fSAl Viro gfp_t priority); 9621da177e4SLinus Torvalds extern void sock_wfree(struct sk_buff *skb); 9631da177e4SLinus Torvalds extern void sock_rfree(struct sk_buff *skb); 9641da177e4SLinus Torvalds 9651da177e4SLinus Torvalds extern int sock_setsockopt(struct socket *sock, int level, 9661da177e4SLinus Torvalds int op, char __user *optval, 967b7058842SDavid S. Miller unsigned int optlen); 9681da177e4SLinus Torvalds 9691da177e4SLinus Torvalds extern int sock_getsockopt(struct socket *sock, int level, 9701da177e4SLinus Torvalds int op, char __user *optval, 9711da177e4SLinus Torvalds int __user *optlen); 9721da177e4SLinus Torvalds extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, 9731da177e4SLinus Torvalds unsigned long size, 9741da177e4SLinus Torvalds int noblock, 9751da177e4SLinus Torvalds int *errcode); 9764cc7f68dSHerbert Xu extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 9774cc7f68dSHerbert Xu unsigned long header_len, 9784cc7f68dSHerbert Xu unsigned long data_len, 9794cc7f68dSHerbert Xu int noblock, 9804cc7f68dSHerbert Xu int *errcode); 98186a76cafSVictor Fusco extern void *sock_kmalloc(struct sock *sk, int size, 982dd0fc66fSAl Viro gfp_t priority); 9831da177e4SLinus Torvalds extern void sock_kfree_s(struct sock *sk, void *mem, int size); 9841da177e4SLinus Torvalds extern void sk_send_sigurg(struct sock *sk); 9851da177e4SLinus Torvalds 9861da177e4SLinus Torvalds /* 9871da177e4SLinus Torvalds * Functions to fill in entries in struct proto_ops when a protocol 9881da177e4SLinus Torvalds * does not implement a particular function. 9891da177e4SLinus Torvalds */ 9901da177e4SLinus Torvalds extern int sock_no_bind(struct socket *, 9911da177e4SLinus Torvalds struct sockaddr *, int); 9921da177e4SLinus Torvalds extern int sock_no_connect(struct socket *, 9931da177e4SLinus Torvalds struct sockaddr *, int, int); 9941da177e4SLinus Torvalds extern int sock_no_socketpair(struct socket *, 9951da177e4SLinus Torvalds struct socket *); 9961da177e4SLinus Torvalds extern int sock_no_accept(struct socket *, 9971da177e4SLinus Torvalds struct socket *, int); 9981da177e4SLinus Torvalds extern int sock_no_getname(struct socket *, 9991da177e4SLinus Torvalds struct sockaddr *, int *, int); 10001da177e4SLinus Torvalds extern unsigned int sock_no_poll(struct file *, struct socket *, 10011da177e4SLinus Torvalds struct poll_table_struct *); 10021da177e4SLinus Torvalds extern int sock_no_ioctl(struct socket *, unsigned int, 10031da177e4SLinus Torvalds unsigned long); 10041da177e4SLinus Torvalds extern int sock_no_listen(struct socket *, int); 10051da177e4SLinus Torvalds extern int sock_no_shutdown(struct socket *, int); 10061da177e4SLinus Torvalds extern int sock_no_getsockopt(struct socket *, int , int, 10071da177e4SLinus Torvalds char __user *, int __user *); 10081da177e4SLinus Torvalds extern int sock_no_setsockopt(struct socket *, int, int, 1009b7058842SDavid S. Miller char __user *, unsigned int); 10101da177e4SLinus Torvalds extern int sock_no_sendmsg(struct kiocb *, struct socket *, 10111da177e4SLinus Torvalds struct msghdr *, size_t); 10121da177e4SLinus Torvalds extern int sock_no_recvmsg(struct kiocb *, struct socket *, 10131da177e4SLinus Torvalds struct msghdr *, size_t, int); 10141da177e4SLinus Torvalds extern int sock_no_mmap(struct file *file, 10151da177e4SLinus Torvalds struct socket *sock, 10161da177e4SLinus Torvalds struct vm_area_struct *vma); 10171da177e4SLinus Torvalds extern ssize_t sock_no_sendpage(struct socket *sock, 10181da177e4SLinus Torvalds struct page *page, 10191da177e4SLinus Torvalds int offset, size_t size, 10201da177e4SLinus Torvalds int flags); 10211da177e4SLinus Torvalds 10221da177e4SLinus Torvalds /* 10231da177e4SLinus Torvalds * Functions to fill in entries in struct proto_ops when a protocol 10241da177e4SLinus Torvalds * uses the inet style. 10251da177e4SLinus Torvalds */ 10261da177e4SLinus Torvalds extern int sock_common_getsockopt(struct socket *sock, int level, int optname, 10271da177e4SLinus Torvalds char __user *optval, int __user *optlen); 10281da177e4SLinus Torvalds extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 10291da177e4SLinus Torvalds struct msghdr *msg, size_t size, int flags); 10301da177e4SLinus Torvalds extern int sock_common_setsockopt(struct socket *sock, int level, int optname, 1031b7058842SDavid S. Miller char __user *optval, unsigned int optlen); 10323fdadf7dSDmitry Mishin extern int compat_sock_common_getsockopt(struct socket *sock, int level, 10333fdadf7dSDmitry Mishin int optname, char __user *optval, int __user *optlen); 10343fdadf7dSDmitry Mishin extern int compat_sock_common_setsockopt(struct socket *sock, int level, 1035b7058842SDavid S. Miller int optname, char __user *optval, unsigned int optlen); 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds extern void sk_common_release(struct sock *sk); 10381da177e4SLinus Torvalds 10391da177e4SLinus Torvalds /* 10401da177e4SLinus Torvalds * Default socket callbacks and setup code 10411da177e4SLinus Torvalds */ 10421da177e4SLinus Torvalds 10431da177e4SLinus Torvalds /* Initialise core socket variables */ 10441da177e4SLinus Torvalds extern void sock_init_data(struct socket *sock, struct sock *sk); 10451da177e4SLinus Torvalds 10461da177e4SLinus Torvalds /** 1047dc9b3346SPaul Bonser * sk_filter_release: Release a socket filter 1048dc9b3346SPaul Bonser * @fp: filter to remove 1049dc9b3346SPaul Bonser * 1050dc9b3346SPaul Bonser * Remove a filter from a socket and release its resources. 1051dc9b3346SPaul Bonser */ 1052dc9b3346SPaul Bonser 1053309dd5fcSPavel Emelyanov static inline void sk_filter_release(struct sk_filter *fp) 1054309dd5fcSPavel Emelyanov { 1055309dd5fcSPavel Emelyanov if (atomic_dec_and_test(&fp->refcnt)) 105647e958eaSPavel Emelyanov kfree(fp); 1057309dd5fcSPavel Emelyanov } 1058309dd5fcSPavel Emelyanov 1059309dd5fcSPavel Emelyanov static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 10601da177e4SLinus Torvalds { 10611da177e4SLinus Torvalds unsigned int size = sk_filter_len(fp); 10621da177e4SLinus Torvalds 10631da177e4SLinus Torvalds atomic_sub(size, &sk->sk_omem_alloc); 1064309dd5fcSPavel Emelyanov sk_filter_release(fp); 10651da177e4SLinus Torvalds } 10661da177e4SLinus Torvalds 10671da177e4SLinus Torvalds static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) 10681da177e4SLinus Torvalds { 10691da177e4SLinus Torvalds atomic_inc(&fp->refcnt); 10701da177e4SLinus Torvalds atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); 10711da177e4SLinus Torvalds } 10721da177e4SLinus Torvalds 10731da177e4SLinus Torvalds /* 10741da177e4SLinus Torvalds * Socket reference counting postulates. 10751da177e4SLinus Torvalds * 10761da177e4SLinus Torvalds * * Each user of socket SHOULD hold a reference count. 10771da177e4SLinus Torvalds * * Each access point to socket (an hash table bucket, reference from a list, 10781da177e4SLinus Torvalds * running timer, skb in flight MUST hold a reference count. 10791da177e4SLinus Torvalds * * When reference count hits 0, it means it will never increase back. 10801da177e4SLinus Torvalds * * When reference count hits 0, it means that no references from 10811da177e4SLinus Torvalds * outside exist to this socket and current process on current CPU 10821da177e4SLinus Torvalds * is last user and may/should destroy this socket. 10831da177e4SLinus Torvalds * * sk_free is called from any context: process, BH, IRQ. When 10841da177e4SLinus Torvalds * it is called, socket has no references from outside -> sk_free 10851da177e4SLinus Torvalds * may release descendant resources allocated by the socket, but 10861da177e4SLinus Torvalds * to the time when it is called, socket is NOT referenced by any 10871da177e4SLinus Torvalds * hash tables, lists etc. 10881da177e4SLinus Torvalds * * Packets, delivered from outside (from network or from another process) 10891da177e4SLinus Torvalds * and enqueued on receive/error queues SHOULD NOT grab reference count, 10901da177e4SLinus Torvalds * when they sit in queue. Otherwise, packets will leak to hole, when 10911da177e4SLinus Torvalds * socket is looked up by one cpu and unhasing is made by another CPU. 10921da177e4SLinus Torvalds * It is true for udp/raw, netlink (leak to receive and error queues), tcp 10931da177e4SLinus Torvalds * (leak to backlog). Packet socket does all the processing inside 10941da177e4SLinus Torvalds * BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets 10951da177e4SLinus Torvalds * use separate SMP lock, so that they are prone too. 10961da177e4SLinus Torvalds */ 10971da177e4SLinus Torvalds 10981da177e4SLinus Torvalds /* Ungrab socket and destroy it, if it was the last reference. */ 10991da177e4SLinus Torvalds static inline void sock_put(struct sock *sk) 11001da177e4SLinus Torvalds { 11011da177e4SLinus Torvalds if (atomic_dec_and_test(&sk->sk_refcnt)) 11021da177e4SLinus Torvalds sk_free(sk); 11031da177e4SLinus Torvalds } 11041da177e4SLinus Torvalds 110558a5a7b9SArnaldo Carvalho de Melo extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, 110658a5a7b9SArnaldo Carvalho de Melo const int nested); 110725995ff5SArnaldo Carvalho de Melo 1108e022f0b4SKrishna Kumar static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) 1109e022f0b4SKrishna Kumar { 1110e022f0b4SKrishna Kumar sk->sk_tx_queue_mapping = tx_queue; 1111e022f0b4SKrishna Kumar } 1112e022f0b4SKrishna Kumar 1113e022f0b4SKrishna Kumar static inline void sk_tx_queue_clear(struct sock *sk) 1114e022f0b4SKrishna Kumar { 1115e022f0b4SKrishna Kumar sk->sk_tx_queue_mapping = -1; 1116e022f0b4SKrishna Kumar } 1117e022f0b4SKrishna Kumar 1118e022f0b4SKrishna Kumar static inline int sk_tx_queue_get(const struct sock *sk) 1119e022f0b4SKrishna Kumar { 1120e022f0b4SKrishna Kumar return sk->sk_tx_queue_mapping; 1121e022f0b4SKrishna Kumar } 1122e022f0b4SKrishna Kumar 1123e022f0b4SKrishna Kumar static inline bool sk_tx_queue_recorded(const struct sock *sk) 1124e022f0b4SKrishna Kumar { 1125e022f0b4SKrishna Kumar return (sk && sk->sk_tx_queue_mapping >= 0); 1126e022f0b4SKrishna Kumar } 1127e022f0b4SKrishna Kumar 1128972692e0SDavid S. Miller static inline void sk_set_socket(struct sock *sk, struct socket *sock) 1129972692e0SDavid S. Miller { 1130e022f0b4SKrishna Kumar sk_tx_queue_clear(sk); 1131972692e0SDavid S. Miller sk->sk_socket = sock; 1132972692e0SDavid S. Miller } 1133972692e0SDavid S. Miller 11341da177e4SLinus Torvalds /* Detach socket from process context. 11351da177e4SLinus Torvalds * Announce socket dead, detach it from wait queue and inode. 11361da177e4SLinus Torvalds * Note that parent inode held reference count on this struct sock, 11371da177e4SLinus Torvalds * we do not release it in this function, because protocol 11381da177e4SLinus Torvalds * probably wants some additional cleanups or even continuing 11391da177e4SLinus Torvalds * to work with this socket (TCP). 11401da177e4SLinus Torvalds */ 11411da177e4SLinus Torvalds static inline void sock_orphan(struct sock *sk) 11421da177e4SLinus Torvalds { 11431da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 11441da177e4SLinus Torvalds sock_set_flag(sk, SOCK_DEAD); 1145972692e0SDavid S. Miller sk_set_socket(sk, NULL); 11461da177e4SLinus Torvalds sk->sk_sleep = NULL; 11471da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 11481da177e4SLinus Torvalds } 11491da177e4SLinus Torvalds 11501da177e4SLinus Torvalds static inline void sock_graft(struct sock *sk, struct socket *parent) 11511da177e4SLinus Torvalds { 11521da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 11531da177e4SLinus Torvalds sk->sk_sleep = &parent->wait; 11541da177e4SLinus Torvalds parent->sk = sk; 1155972692e0SDavid S. Miller sk_set_socket(sk, parent); 11564237c75cSVenkat Yekkirala security_sock_graft(sk, parent); 11571da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 11581da177e4SLinus Torvalds } 11591da177e4SLinus Torvalds 11601da177e4SLinus Torvalds extern int sock_i_uid(struct sock *sk); 11611da177e4SLinus Torvalds extern unsigned long sock_i_ino(struct sock *sk); 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds static inline struct dst_entry * 11641da177e4SLinus Torvalds __sk_dst_get(struct sock *sk) 11651da177e4SLinus Torvalds { 11661da177e4SLinus Torvalds return sk->sk_dst_cache; 11671da177e4SLinus Torvalds } 11681da177e4SLinus Torvalds 11691da177e4SLinus Torvalds static inline struct dst_entry * 11701da177e4SLinus Torvalds sk_dst_get(struct sock *sk) 11711da177e4SLinus Torvalds { 11721da177e4SLinus Torvalds struct dst_entry *dst; 11731da177e4SLinus Torvalds 11741da177e4SLinus Torvalds read_lock(&sk->sk_dst_lock); 11751da177e4SLinus Torvalds dst = sk->sk_dst_cache; 11761da177e4SLinus Torvalds if (dst) 11771da177e4SLinus Torvalds dst_hold(dst); 11781da177e4SLinus Torvalds read_unlock(&sk->sk_dst_lock); 11791da177e4SLinus Torvalds return dst; 11801da177e4SLinus Torvalds } 11811da177e4SLinus Torvalds 11821da177e4SLinus Torvalds static inline void 11831da177e4SLinus Torvalds __sk_dst_set(struct sock *sk, struct dst_entry *dst) 11841da177e4SLinus Torvalds { 11851da177e4SLinus Torvalds struct dst_entry *old_dst; 11861da177e4SLinus Torvalds 1187e022f0b4SKrishna Kumar sk_tx_queue_clear(sk); 11881da177e4SLinus Torvalds old_dst = sk->sk_dst_cache; 11891da177e4SLinus Torvalds sk->sk_dst_cache = dst; 11901da177e4SLinus Torvalds dst_release(old_dst); 11911da177e4SLinus Torvalds } 11921da177e4SLinus Torvalds 11931da177e4SLinus Torvalds static inline void 11941da177e4SLinus Torvalds sk_dst_set(struct sock *sk, struct dst_entry *dst) 11951da177e4SLinus Torvalds { 11961da177e4SLinus Torvalds write_lock(&sk->sk_dst_lock); 11971da177e4SLinus Torvalds __sk_dst_set(sk, dst); 11981da177e4SLinus Torvalds write_unlock(&sk->sk_dst_lock); 11991da177e4SLinus Torvalds } 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds static inline void 12021da177e4SLinus Torvalds __sk_dst_reset(struct sock *sk) 12031da177e4SLinus Torvalds { 12041da177e4SLinus Torvalds struct dst_entry *old_dst; 12051da177e4SLinus Torvalds 1206e022f0b4SKrishna Kumar sk_tx_queue_clear(sk); 12071da177e4SLinus Torvalds old_dst = sk->sk_dst_cache; 12081da177e4SLinus Torvalds sk->sk_dst_cache = NULL; 12091da177e4SLinus Torvalds dst_release(old_dst); 12101da177e4SLinus Torvalds } 12111da177e4SLinus Torvalds 12121da177e4SLinus Torvalds static inline void 12131da177e4SLinus Torvalds sk_dst_reset(struct sock *sk) 12141da177e4SLinus Torvalds { 12151da177e4SLinus Torvalds write_lock(&sk->sk_dst_lock); 12161da177e4SLinus Torvalds __sk_dst_reset(sk); 12171da177e4SLinus Torvalds write_unlock(&sk->sk_dst_lock); 12181da177e4SLinus Torvalds } 12191da177e4SLinus Torvalds 1220f0088a50SDenis Vlasenko extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); 12211da177e4SLinus Torvalds 1222f0088a50SDenis Vlasenko extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); 12231da177e4SLinus Torvalds 1224bcd76111SHerbert Xu static inline int sk_can_gso(const struct sock *sk) 1225bcd76111SHerbert Xu { 1226bcd76111SHerbert Xu return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); 1227bcd76111SHerbert Xu } 1228bcd76111SHerbert Xu 12299958089aSAndi Kleen extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); 12306cbb0df7SArnaldo Carvalho de Melo 12311da177e4SLinus Torvalds static inline int skb_copy_to_page(struct sock *sk, char __user *from, 12321da177e4SLinus Torvalds struct sk_buff *skb, struct page *page, 12331da177e4SLinus Torvalds int off, int copy) 12341da177e4SLinus Torvalds { 12351da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_NONE) { 12361da177e4SLinus Torvalds int err = 0; 12375084205fSAl Viro __wsum csum = csum_and_copy_from_user(from, 12381da177e4SLinus Torvalds page_address(page) + off, 12391da177e4SLinus Torvalds copy, 0, &err); 12401da177e4SLinus Torvalds if (err) 12411da177e4SLinus Torvalds return err; 12421da177e4SLinus Torvalds skb->csum = csum_block_add(skb->csum, csum, skb->len); 12431da177e4SLinus Torvalds } else if (copy_from_user(page_address(page) + off, from, copy)) 12441da177e4SLinus Torvalds return -EFAULT; 12451da177e4SLinus Torvalds 12461da177e4SLinus Torvalds skb->len += copy; 12471da177e4SLinus Torvalds skb->data_len += copy; 12481da177e4SLinus Torvalds skb->truesize += copy; 12491da177e4SLinus Torvalds sk->sk_wmem_queued += copy; 12503ab224beSHideo Aoki sk_mem_charge(sk, copy); 12511da177e4SLinus Torvalds return 0; 12521da177e4SLinus Torvalds } 12531da177e4SLinus Torvalds 1254c564039fSEric Dumazet /** 1255c564039fSEric Dumazet * sk_wmem_alloc_get - returns write allocations 1256c564039fSEric Dumazet * @sk: socket 1257c564039fSEric Dumazet * 1258c564039fSEric Dumazet * Returns sk_wmem_alloc minus initial offset of one 1259c564039fSEric Dumazet */ 1260c564039fSEric Dumazet static inline int sk_wmem_alloc_get(const struct sock *sk) 1261c564039fSEric Dumazet { 1262c564039fSEric Dumazet return atomic_read(&sk->sk_wmem_alloc) - 1; 1263c564039fSEric Dumazet } 1264c564039fSEric Dumazet 1265c564039fSEric Dumazet /** 1266c564039fSEric Dumazet * sk_rmem_alloc_get - returns read allocations 1267c564039fSEric Dumazet * @sk: socket 1268c564039fSEric Dumazet * 1269c564039fSEric Dumazet * Returns sk_rmem_alloc 1270c564039fSEric Dumazet */ 1271c564039fSEric Dumazet static inline int sk_rmem_alloc_get(const struct sock *sk) 1272c564039fSEric Dumazet { 1273c564039fSEric Dumazet return atomic_read(&sk->sk_rmem_alloc); 1274c564039fSEric Dumazet } 1275c564039fSEric Dumazet 1276c564039fSEric Dumazet /** 1277c564039fSEric Dumazet * sk_has_allocations - check if allocations are outstanding 1278c564039fSEric Dumazet * @sk: socket 1279c564039fSEric Dumazet * 1280c564039fSEric Dumazet * Returns true if socket has write or read allocations 1281c564039fSEric Dumazet */ 1282c564039fSEric Dumazet static inline int sk_has_allocations(const struct sock *sk) 1283c564039fSEric Dumazet { 1284c564039fSEric Dumazet return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); 1285c564039fSEric Dumazet } 1286c564039fSEric Dumazet 1287a57de0b4SJiri Olsa /** 1288a57de0b4SJiri Olsa * sk_has_sleeper - check if there are any waiting processes 1289a57de0b4SJiri Olsa * @sk: socket 1290a57de0b4SJiri Olsa * 1291a57de0b4SJiri Olsa * Returns true if socket has waiting processes 1292a57de0b4SJiri Olsa * 1293a57de0b4SJiri Olsa * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory 1294a57de0b4SJiri Olsa * barrier call. They were added due to the race found within the tcp code. 1295a57de0b4SJiri Olsa * 1296a57de0b4SJiri Olsa * Consider following tcp code paths: 1297a57de0b4SJiri Olsa * 1298a57de0b4SJiri Olsa * CPU1 CPU2 1299a57de0b4SJiri Olsa * 1300a57de0b4SJiri Olsa * sys_select receive packet 1301a57de0b4SJiri Olsa * ... ... 1302a57de0b4SJiri Olsa * __add_wait_queue update tp->rcv_nxt 1303a57de0b4SJiri Olsa * ... ... 1304a57de0b4SJiri Olsa * tp->rcv_nxt check sock_def_readable 1305a57de0b4SJiri Olsa * ... { 1306a57de0b4SJiri Olsa * schedule ... 1307a57de0b4SJiri Olsa * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1308a57de0b4SJiri Olsa * wake_up_interruptible(sk->sk_sleep) 1309a57de0b4SJiri Olsa * ... 1310a57de0b4SJiri Olsa * } 1311a57de0b4SJiri Olsa * 1312a57de0b4SJiri Olsa * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay 1313a57de0b4SJiri Olsa * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 1314a57de0b4SJiri Olsa * could then endup calling schedule and sleep forever if there are no more 1315a57de0b4SJiri Olsa * data on the socket. 1316ad462769SJiri Olsa * 1317ad462769SJiri Olsa * The sk_has_sleeper is always called right after a call to read_lock, so we 1318ad462769SJiri Olsa * can use smp_mb__after_lock barrier. 1319a57de0b4SJiri Olsa */ 1320a57de0b4SJiri Olsa static inline int sk_has_sleeper(struct sock *sk) 1321a57de0b4SJiri Olsa { 1322a57de0b4SJiri Olsa /* 1323a57de0b4SJiri Olsa * We need to be sure we are in sync with the 1324a57de0b4SJiri Olsa * add_wait_queue modifications to the wait queue. 1325a57de0b4SJiri Olsa * 1326a57de0b4SJiri Olsa * This memory barrier is paired in the sock_poll_wait. 1327a57de0b4SJiri Olsa */ 1328ad462769SJiri Olsa smp_mb__after_lock(); 1329a57de0b4SJiri Olsa return sk->sk_sleep && waitqueue_active(sk->sk_sleep); 1330a57de0b4SJiri Olsa } 1331a57de0b4SJiri Olsa 1332a57de0b4SJiri Olsa /** 1333a57de0b4SJiri Olsa * sock_poll_wait - place memory barrier behind the poll_wait call. 1334a57de0b4SJiri Olsa * @filp: file 1335a57de0b4SJiri Olsa * @wait_address: socket wait queue 1336a57de0b4SJiri Olsa * @p: poll_table 1337a57de0b4SJiri Olsa * 1338a57de0b4SJiri Olsa * See the comments in the sk_has_sleeper function. 1339a57de0b4SJiri Olsa */ 1340a57de0b4SJiri Olsa static inline void sock_poll_wait(struct file *filp, 1341a57de0b4SJiri Olsa wait_queue_head_t *wait_address, poll_table *p) 1342a57de0b4SJiri Olsa { 1343a57de0b4SJiri Olsa if (p && wait_address) { 1344a57de0b4SJiri Olsa poll_wait(filp, wait_address, p); 1345a57de0b4SJiri Olsa /* 1346a57de0b4SJiri Olsa * We need to be sure we are in sync with the 1347a57de0b4SJiri Olsa * socket flags modification. 1348a57de0b4SJiri Olsa * 1349a57de0b4SJiri Olsa * This memory barrier is paired in the sk_has_sleeper. 1350a57de0b4SJiri Olsa */ 1351a57de0b4SJiri Olsa smp_mb(); 1352a57de0b4SJiri Olsa } 1353a57de0b4SJiri Olsa } 1354a57de0b4SJiri Olsa 13551da177e4SLinus Torvalds /* 13561da177e4SLinus Torvalds * Queue a received datagram if it will fit. Stream and sequenced 13571da177e4SLinus Torvalds * protocols can't normally use this as they need to fit buffers in 13581da177e4SLinus Torvalds * and play with them. 13591da177e4SLinus Torvalds * 13601da177e4SLinus Torvalds * Inlined as it's very short and called for pretty much every 13611da177e4SLinus Torvalds * packet ever received. 13621da177e4SLinus Torvalds */ 13631da177e4SLinus Torvalds 13641da177e4SLinus Torvalds static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 13651da177e4SLinus Torvalds { 1366d55d87fdSHerbert Xu skb_orphan(skb); 13671da177e4SLinus Torvalds skb->sk = sk; 13681da177e4SLinus Torvalds skb->destructor = sock_wfree; 13692b85a34eSEric Dumazet /* 13702b85a34eSEric Dumazet * We used to take a refcount on sk, but following operation 13712b85a34eSEric Dumazet * is enough to guarantee sk_free() wont free this sock until 13722b85a34eSEric Dumazet * all in-flight packets are completed 13732b85a34eSEric Dumazet */ 13741da177e4SLinus Torvalds atomic_add(skb->truesize, &sk->sk_wmem_alloc); 13751da177e4SLinus Torvalds } 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 13781da177e4SLinus Torvalds { 1379d55d87fdSHerbert Xu skb_orphan(skb); 13801da177e4SLinus Torvalds skb->sk = sk; 13811da177e4SLinus Torvalds skb->destructor = sock_rfree; 13821da177e4SLinus Torvalds atomic_add(skb->truesize, &sk->sk_rmem_alloc); 13833ab224beSHideo Aoki sk_mem_charge(sk, skb->truesize); 13841da177e4SLinus Torvalds } 13851da177e4SLinus Torvalds 13861da177e4SLinus Torvalds extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, 13871da177e4SLinus Torvalds unsigned long expires); 13881da177e4SLinus Torvalds 13891da177e4SLinus Torvalds extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); 13901da177e4SLinus Torvalds 1391f0088a50SDenis Vlasenko extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 13921da177e4SLinus Torvalds 13931da177e4SLinus Torvalds static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) 13941da177e4SLinus Torvalds { 13951da177e4SLinus Torvalds /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces 13961da177e4SLinus Torvalds number of warnings when compiling with -W --ANK 13971da177e4SLinus Torvalds */ 13981da177e4SLinus Torvalds if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 13991da177e4SLinus Torvalds (unsigned)sk->sk_rcvbuf) 14001da177e4SLinus Torvalds return -ENOMEM; 14011da177e4SLinus Torvalds skb_set_owner_r(skb, sk); 14021da177e4SLinus Torvalds skb_queue_tail(&sk->sk_error_queue, skb); 14031da177e4SLinus Torvalds if (!sock_flag(sk, SOCK_DEAD)) 14041da177e4SLinus Torvalds sk->sk_data_ready(sk, skb->len); 14051da177e4SLinus Torvalds return 0; 14061da177e4SLinus Torvalds } 14071da177e4SLinus Torvalds 14081da177e4SLinus Torvalds /* 14091da177e4SLinus Torvalds * Recover an error report and clear atomically 14101da177e4SLinus Torvalds */ 14111da177e4SLinus Torvalds 14121da177e4SLinus Torvalds static inline int sock_error(struct sock *sk) 14131da177e4SLinus Torvalds { 1414c1cbe4b7SBenjamin LaHaise int err; 1415c1cbe4b7SBenjamin LaHaise if (likely(!sk->sk_err)) 1416c1cbe4b7SBenjamin LaHaise return 0; 1417c1cbe4b7SBenjamin LaHaise err = xchg(&sk->sk_err, 0); 14181da177e4SLinus Torvalds return -err; 14191da177e4SLinus Torvalds } 14201da177e4SLinus Torvalds 14211da177e4SLinus Torvalds static inline unsigned long sock_wspace(struct sock *sk) 14221da177e4SLinus Torvalds { 14231da177e4SLinus Torvalds int amt = 0; 14241da177e4SLinus Torvalds 14251da177e4SLinus Torvalds if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 14261da177e4SLinus Torvalds amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); 14271da177e4SLinus Torvalds if (amt < 0) 14281da177e4SLinus Torvalds amt = 0; 14291da177e4SLinus Torvalds } 14301da177e4SLinus Torvalds return amt; 14311da177e4SLinus Torvalds } 14321da177e4SLinus Torvalds 14331da177e4SLinus Torvalds static inline void sk_wake_async(struct sock *sk, int how, int band) 14341da177e4SLinus Torvalds { 1435bcdce719SEric Dumazet if (sock_flag(sk, SOCK_FASYNC)) 14361da177e4SLinus Torvalds sock_wake_async(sk->sk_socket, how, band); 14371da177e4SLinus Torvalds } 14381da177e4SLinus Torvalds 14391da177e4SLinus Torvalds #define SOCK_MIN_SNDBUF 2048 14401da177e4SLinus Torvalds #define SOCK_MIN_RCVBUF 256 14411da177e4SLinus Torvalds 14421da177e4SLinus Torvalds static inline void sk_stream_moderate_sndbuf(struct sock *sk) 14431da177e4SLinus Torvalds { 14441da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { 14458df09ea3SEric Dumazet sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); 14461da177e4SLinus Torvalds sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF); 14471da177e4SLinus Torvalds } 14481da177e4SLinus Torvalds } 14491da177e4SLinus Torvalds 1450df97c708SPavel Emelyanov struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds static inline struct page *sk_stream_alloc_page(struct sock *sk) 14531da177e4SLinus Torvalds { 14541da177e4SLinus Torvalds struct page *page = NULL; 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds page = alloc_pages(sk->sk_allocation, 0); 1457ef015786SHerbert Xu if (!page) { 14585c52ba17SPavel Emelyanov sk->sk_prot->enter_memory_pressure(sk); 14591da177e4SLinus Torvalds sk_stream_moderate_sndbuf(sk); 14601da177e4SLinus Torvalds } 14611da177e4SLinus Torvalds return page; 14621da177e4SLinus Torvalds } 14631da177e4SLinus Torvalds 14641da177e4SLinus Torvalds /* 14651da177e4SLinus Torvalds * Default write policy as shown to user space via poll/select/SIGIO 14661da177e4SLinus Torvalds */ 14671da177e4SLinus Torvalds static inline int sock_writeable(const struct sock *sk) 14681da177e4SLinus Torvalds { 14698df09ea3SEric Dumazet return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); 14701da177e4SLinus Torvalds } 14711da177e4SLinus Torvalds 1472dd0fc66fSAl Viro static inline gfp_t gfp_any(void) 14731da177e4SLinus Torvalds { 147499709372SAndrew Morton return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; 14751da177e4SLinus Torvalds } 14761da177e4SLinus Torvalds 14771da177e4SLinus Torvalds static inline long sock_rcvtimeo(const struct sock *sk, int noblock) 14781da177e4SLinus Torvalds { 14791da177e4SLinus Torvalds return noblock ? 0 : sk->sk_rcvtimeo; 14801da177e4SLinus Torvalds } 14811da177e4SLinus Torvalds 14821da177e4SLinus Torvalds static inline long sock_sndtimeo(const struct sock *sk, int noblock) 14831da177e4SLinus Torvalds { 14841da177e4SLinus Torvalds return noblock ? 0 : sk->sk_sndtimeo; 14851da177e4SLinus Torvalds } 14861da177e4SLinus Torvalds 14871da177e4SLinus Torvalds static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) 14881da177e4SLinus Torvalds { 14891da177e4SLinus Torvalds return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; 14901da177e4SLinus Torvalds } 14911da177e4SLinus Torvalds 14921da177e4SLinus Torvalds /* Alas, with timeout socket operations are not restartable. 14931da177e4SLinus Torvalds * Compare this to poll(). 14941da177e4SLinus Torvalds */ 14951da177e4SLinus Torvalds static inline int sock_intr_errno(long timeo) 14961da177e4SLinus Torvalds { 14971da177e4SLinus Torvalds return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; 14981da177e4SLinus Torvalds } 14991da177e4SLinus Torvalds 150092f37fd2SEric Dumazet extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 150192f37fd2SEric Dumazet struct sk_buff *skb); 150292f37fd2SEric Dumazet 15031da177e4SLinus Torvalds static __inline__ void 15041da177e4SLinus Torvalds sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 15051da177e4SLinus Torvalds { 1506b7aa0bf7SEric Dumazet ktime_t kt = skb->tstamp; 150720d49473SPatrick Ohly struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); 1508a61bbcf2SPatrick McHardy 150920d49473SPatrick Ohly /* 151020d49473SPatrick Ohly * generate control messages if 151120d49473SPatrick Ohly * - receive time stamping in software requested (SOCK_RCVTSTAMP 151220d49473SPatrick Ohly * or SOCK_TIMESTAMPING_RX_SOFTWARE) 151320d49473SPatrick Ohly * - software time stamp available and wanted 151420d49473SPatrick Ohly * (SOCK_TIMESTAMPING_SOFTWARE) 151520d49473SPatrick Ohly * - hardware time stamps available and wanted 151620d49473SPatrick Ohly * (SOCK_TIMESTAMPING_SYS_HARDWARE or 151720d49473SPatrick Ohly * SOCK_TIMESTAMPING_RAW_HARDWARE) 151820d49473SPatrick Ohly */ 151920d49473SPatrick Ohly if (sock_flag(sk, SOCK_RCVTSTAMP) || 152020d49473SPatrick Ohly sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || 152120d49473SPatrick Ohly (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || 152220d49473SPatrick Ohly (hwtstamps->hwtstamp.tv64 && 152320d49473SPatrick Ohly sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) || 152420d49473SPatrick Ohly (hwtstamps->syststamp.tv64 && 152520d49473SPatrick Ohly sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))) 152692f37fd2SEric Dumazet __sock_recv_timestamp(msg, sk, skb); 152792f37fd2SEric Dumazet else 1528b7aa0bf7SEric Dumazet sk->sk_stamp = kt; 15291da177e4SLinus Torvalds } 15301da177e4SLinus Torvalds 15313b885787SNeil Horman extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); 15323b885787SNeil Horman 15331da177e4SLinus Torvalds /** 153420d49473SPatrick Ohly * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 153520d49473SPatrick Ohly * @msg: outgoing packet 153620d49473SPatrick Ohly * @sk: socket sending this packet 153720d49473SPatrick Ohly * @shtx: filled with instructions for time stamping 153820d49473SPatrick Ohly * 153920d49473SPatrick Ohly * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if 154020d49473SPatrick Ohly * parameters are invalid. 154120d49473SPatrick Ohly */ 154220d49473SPatrick Ohly extern int sock_tx_timestamp(struct msghdr *msg, 154320d49473SPatrick Ohly struct sock *sk, 154420d49473SPatrick Ohly union skb_shared_tx *shtx); 154520d49473SPatrick Ohly 154620d49473SPatrick Ohly 154720d49473SPatrick Ohly /** 15481da177e4SLinus Torvalds * sk_eat_skb - Release a skb if it is no longer needed 15494dc3b16bSPavel Pisa * @sk: socket to eat this skb from 15504dc3b16bSPavel Pisa * @skb: socket buffer to eat 1551f4b8ea78SRandy Dunlap * @copied_early: flag indicating whether DMA operations copied this data early 15521da177e4SLinus Torvalds * 15531da177e4SLinus Torvalds * This routine must be called with interrupts disabled or with the socket 15541da177e4SLinus Torvalds * locked so that the sk_buff queue operation is ok. 15551da177e4SLinus Torvalds */ 1556624d1164SChris Leech #ifdef CONFIG_NET_DMA 1557624d1164SChris Leech static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) 1558624d1164SChris Leech { 1559624d1164SChris Leech __skb_unlink(skb, &sk->sk_receive_queue); 1560624d1164SChris Leech if (!copied_early) 1561624d1164SChris Leech __kfree_skb(skb); 1562624d1164SChris Leech else 1563624d1164SChris Leech __skb_queue_tail(&sk->sk_async_wait_queue, skb); 1564624d1164SChris Leech } 1565624d1164SChris Leech #else 1566624d1164SChris Leech static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) 15671da177e4SLinus Torvalds { 15681da177e4SLinus Torvalds __skb_unlink(skb, &sk->sk_receive_queue); 15691da177e4SLinus Torvalds __kfree_skb(skb); 15701da177e4SLinus Torvalds } 1571624d1164SChris Leech #endif 15721da177e4SLinus Torvalds 15733b1e0a65SYOSHIFUJI Hideaki static inline 15743b1e0a65SYOSHIFUJI Hideaki struct net *sock_net(const struct sock *sk) 15753b1e0a65SYOSHIFUJI Hideaki { 15763b1e0a65SYOSHIFUJI Hideaki #ifdef CONFIG_NET_NS 15773b1e0a65SYOSHIFUJI Hideaki return sk->sk_net; 15783b1e0a65SYOSHIFUJI Hideaki #else 15793b1e0a65SYOSHIFUJI Hideaki return &init_net; 15803b1e0a65SYOSHIFUJI Hideaki #endif 15813b1e0a65SYOSHIFUJI Hideaki } 15823b1e0a65SYOSHIFUJI Hideaki 15833b1e0a65SYOSHIFUJI Hideaki static inline 1584f5aa23fdSDenis V. Lunev void sock_net_set(struct sock *sk, struct net *net) 15853b1e0a65SYOSHIFUJI Hideaki { 15863b1e0a65SYOSHIFUJI Hideaki #ifdef CONFIG_NET_NS 15873b1e0a65SYOSHIFUJI Hideaki sk->sk_net = net; 15883b1e0a65SYOSHIFUJI Hideaki #endif 15893b1e0a65SYOSHIFUJI Hideaki } 15903b1e0a65SYOSHIFUJI Hideaki 1591edf02087SDenis V. Lunev /* 1592edf02087SDenis V. Lunev * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. 1593edf02087SDenis V. Lunev * They should not hold a referrence to a namespace in order to allow 1594edf02087SDenis V. Lunev * to stop it. 1595edf02087SDenis V. Lunev * Sockets after sk_change_net should be released using sk_release_kernel 1596edf02087SDenis V. Lunev */ 1597edf02087SDenis V. Lunev static inline void sk_change_net(struct sock *sk, struct net *net) 1598edf02087SDenis V. Lunev { 15993b1e0a65SYOSHIFUJI Hideaki put_net(sock_net(sk)); 160065a18ec5SDenis V. Lunev sock_net_set(sk, hold_net(net)); 1601edf02087SDenis V. Lunev } 1602edf02087SDenis V. Lunev 160323542618SKOVACS Krisztian static inline struct sock *skb_steal_sock(struct sk_buff *skb) 160423542618SKOVACS Krisztian { 160523542618SKOVACS Krisztian if (unlikely(skb->sk)) { 160623542618SKOVACS Krisztian struct sock *sk = skb->sk; 160723542618SKOVACS Krisztian 160823542618SKOVACS Krisztian skb->destructor = NULL; 160923542618SKOVACS Krisztian skb->sk = NULL; 161023542618SKOVACS Krisztian return sk; 161123542618SKOVACS Krisztian } 161223542618SKOVACS Krisztian return NULL; 161323542618SKOVACS Krisztian } 161423542618SKOVACS Krisztian 161520d49473SPatrick Ohly extern void sock_enable_timestamp(struct sock *sk, int flag); 16161da177e4SLinus Torvalds extern int sock_get_timestamp(struct sock *, struct timeval __user *); 1617ae40eb1eSEric Dumazet extern int sock_get_timestampns(struct sock *, struct timespec __user *); 16181da177e4SLinus Torvalds 16191da177e4SLinus Torvalds /* 16201da177e4SLinus Torvalds * Enable debug/info messages 16211da177e4SLinus Torvalds */ 1622a2a316fdSStephen Hemminger extern int net_msg_warn; 1623a2a316fdSStephen Hemminger #define NETDEBUG(fmt, args...) \ 1624a2a316fdSStephen Hemminger do { if (net_msg_warn) printk(fmt,##args); } while (0) 16251da177e4SLinus Torvalds 1626a2a316fdSStephen Hemminger #define LIMIT_NETDEBUG(fmt, args...) \ 1627a2a316fdSStephen Hemminger do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0) 16281da177e4SLinus Torvalds 16291da177e4SLinus Torvalds extern __u32 sysctl_wmem_max; 16301da177e4SLinus Torvalds extern __u32 sysctl_rmem_max; 16311da177e4SLinus Torvalds 163220380731SArnaldo Carvalho de Melo extern void sk_init(void); 163320380731SArnaldo Carvalho de Melo 16346baf1f41SDavid S. Miller extern int sysctl_optmem_max; 16356baf1f41SDavid S. Miller 163620380731SArnaldo Carvalho de Melo extern __u32 sysctl_wmem_default; 163720380731SArnaldo Carvalho de Melo extern __u32 sysctl_rmem_default; 163820380731SArnaldo Carvalho de Melo 16391da177e4SLinus Torvalds #endif /* _SOCK_H */ 1640