1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Definitions for the AF_INET socket handler. 7 * 8 * Version: @(#)sock.h 1.0.4 05/13/93 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Corey Minyard <wf-rch!minyard@relay.EU.net> 13 * Florian La Roche <flla@stud.uni-sb.de> 14 * 15 * Fixes: 16 * Alan Cox : Volatiles in skbuff pointers. See 17 * skbuff comments. May be overdone, 18 * better to prove they can be removed 19 * than the reverse. 20 * Alan Cox : Added a zapped field for tcp to note 21 * a socket is reset and must stay shut up 22 * Alan Cox : New fields for options 23 * Pauline Middelink : identd support 24 * Alan Cox : Eliminate low level recv/recvfrom 25 * David S. Miller : New socket lookup architecture. 26 * Steve Whitehouse: Default routines for sock_ops 27 * Arnaldo C. Melo : removed net_pinfo, tp_pinfo and made 28 * protinfo be just a void pointer, as the 29 * protocol specific parts were moved to 30 * respective headers and ipv4/v6, etc now 31 * use private slabcaches for its socks 32 * Pedro Hortas : New flags field for socket options 33 * 34 * 35 * This program is free software; you can redistribute it and/or 36 * modify it under the terms of the GNU General Public License 37 * as published by the Free Software Foundation; either version 38 * 2 of the License, or (at your option) any later version. 39 */ 40 #ifndef _SOCK_H 41 #define _SOCK_H 42 43 #include <linux/kernel.h> 44 #include <linux/list.h> 45 #include <linux/list_nulls.h> 46 #include <linux/timer.h> 47 #include <linux/cache.h> 48 #include <linux/module.h> 49 #include <linux/lockdep.h> 50 #include <linux/netdevice.h> 51 #include <linux/skbuff.h> /* struct sk_buff */ 52 #include <linux/mm.h> 53 #include <linux/security.h> 54 55 #include <linux/filter.h> 56 #include <linux/rculist_nulls.h> 57 58 #include <asm/atomic.h> 59 #include <net/dst.h> 60 #include <net/checksum.h> 61 62 /* 63 * This structure really needs to be cleaned up. 64 * Most of it is for TCP, and not used by any of 65 * the other protocols. 66 */ 67 68 /* Define this to get the SOCK_DBG debugging facility. */ 69 #define SOCK_DEBUGGING 70 #ifdef SOCK_DEBUGGING 71 #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \ 72 printk(KERN_DEBUG msg); } while (0) 73 #else 74 /* Validate arguments and do nothing */ 75 static void inline int __attribute__ ((format (printf, 2, 3))) 76 SOCK_DEBUG(struct sock *sk, const char *msg, ...) 77 { 78 } 79 #endif 80 81 /* This is the per-socket lock. The spinlock provides a synchronization 82 * between user contexts and software interrupt processing, whereas the 83 * mini-semaphore synchronizes multiple users amongst themselves. 84 */ 85 typedef struct { 86 spinlock_t slock; 87 int owned; 88 wait_queue_head_t wq; 89 /* 90 * We express the mutex-alike socket_lock semantics 91 * to the lock validator by explicitly managing 92 * the slock as a lock variant (in addition to 93 * the slock itself): 94 */ 95 #ifdef CONFIG_DEBUG_LOCK_ALLOC 96 struct lockdep_map dep_map; 97 #endif 98 } socket_lock_t; 99 100 struct sock; 101 struct proto; 102 struct net; 103 104 /** 105 * struct sock_common - minimal network layer representation of sockets 106 * @skc_family: network address family 107 * @skc_state: Connection state 108 * @skc_reuse: %SO_REUSEADDR setting 109 * @skc_bound_dev_if: bound device index if != 0 110 * @skc_node: main hash linkage for various protocol lookup tables 111 * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol 112 * @skc_bind_node: bind hash linkage for various protocol lookup tables 113 * @skc_refcnt: reference count 114 * @skc_hash: hash value used with various protocol lookup tables 115 * @skc_prot: protocol handlers inside a network family 116 * @skc_net: reference to the network namespace of this socket 117 * 118 * This is the minimal network layer representation of sockets, the header 119 * for struct sock and struct inet_timewait_sock. 120 */ 121 struct sock_common { 122 unsigned short skc_family; 123 volatile unsigned char skc_state; 124 unsigned char skc_reuse; 125 int skc_bound_dev_if; 126 union { 127 struct hlist_node skc_node; 128 struct hlist_nulls_node skc_nulls_node; 129 }; 130 struct hlist_node skc_bind_node; 131 atomic_t skc_refcnt; 132 unsigned int skc_hash; 133 struct proto *skc_prot; 134 #ifdef CONFIG_NET_NS 135 struct net *skc_net; 136 #endif 137 }; 138 139 /** 140 * struct sock - network layer representation of sockets 141 * @__sk_common: shared layout with inet_timewait_sock 142 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 143 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 144 * @sk_lock: synchronizer 145 * @sk_rcvbuf: size of receive buffer in bytes 146 * @sk_sleep: sock wait queue 147 * @sk_dst_cache: destination cache 148 * @sk_dst_lock: destination cache lock 149 * @sk_policy: flow policy 150 * @sk_rmem_alloc: receive queue bytes committed 151 * @sk_receive_queue: incoming packets 152 * @sk_wmem_alloc: transmit queue bytes committed 153 * @sk_write_queue: Packet sending queue 154 * @sk_async_wait_queue: DMA copied packets 155 * @sk_omem_alloc: "o" is "option" or "other" 156 * @sk_wmem_queued: persistent queue size 157 * @sk_forward_alloc: space allocated forward 158 * @sk_allocation: allocation mode 159 * @sk_sndbuf: size of send buffer in bytes 160 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 161 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 162 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 163 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 164 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 165 * @sk_gso_max_size: Maximum GSO segment size to build 166 * @sk_lingertime: %SO_LINGER l_linger setting 167 * @sk_backlog: always used with the per-socket spinlock held 168 * @sk_callback_lock: used with the callbacks in the end of this struct 169 * @sk_error_queue: rarely used 170 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, 171 * IPV6_ADDRFORM for instance) 172 * @sk_err: last error 173 * @sk_err_soft: errors that don't cause failure but are the cause of a 174 * persistent failure not just 'timed out' 175 * @sk_drops: raw/udp drops counter 176 * @sk_ack_backlog: current listen backlog 177 * @sk_max_ack_backlog: listen backlog set in listen() 178 * @sk_priority: %SO_PRIORITY setting 179 * @sk_type: socket type (%SOCK_STREAM, etc) 180 * @sk_protocol: which protocol this socket belongs in this network family 181 * @sk_peercred: %SO_PEERCRED setting 182 * @sk_rcvlowat: %SO_RCVLOWAT setting 183 * @sk_rcvtimeo: %SO_RCVTIMEO setting 184 * @sk_sndtimeo: %SO_SNDTIMEO setting 185 * @sk_filter: socket filtering instructions 186 * @sk_protinfo: private area, net family specific, when not using slab 187 * @sk_timer: sock cleanup timer 188 * @sk_stamp: time stamp of last packet received 189 * @sk_socket: Identd and reporting IO signals 190 * @sk_user_data: RPC layer private data 191 * @sk_sndmsg_page: cached page for sendmsg 192 * @sk_sndmsg_off: cached offset for sendmsg 193 * @sk_send_head: front of stuff to transmit 194 * @sk_security: used by security modules 195 * @sk_mark: generic packet mark 196 * @sk_write_pending: a write to stream socket waits to start 197 * @sk_state_change: callback to indicate change in the state of the sock 198 * @sk_data_ready: callback to indicate there is data to be processed 199 * @sk_write_space: callback to indicate there is bf sending space available 200 * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) 201 * @sk_backlog_rcv: callback to process the backlog 202 * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 203 */ 204 struct sock { 205 /* 206 * Now struct inet_timewait_sock also uses sock_common, so please just 207 * don't add nothing before this first member (__sk_common) --acme 208 */ 209 struct sock_common __sk_common; 210 #define sk_family __sk_common.skc_family 211 #define sk_state __sk_common.skc_state 212 #define sk_reuse __sk_common.skc_reuse 213 #define sk_bound_dev_if __sk_common.skc_bound_dev_if 214 #define sk_node __sk_common.skc_node 215 #define sk_nulls_node __sk_common.skc_nulls_node 216 #define sk_bind_node __sk_common.skc_bind_node 217 #define sk_refcnt __sk_common.skc_refcnt 218 #define sk_hash __sk_common.skc_hash 219 #define sk_prot __sk_common.skc_prot 220 #define sk_net __sk_common.skc_net 221 unsigned char sk_shutdown : 2, 222 sk_no_check : 2, 223 sk_userlocks : 4; 224 unsigned char sk_protocol; 225 unsigned short sk_type; 226 int sk_rcvbuf; 227 socket_lock_t sk_lock; 228 /* 229 * The backlog queue is special, it is always used with 230 * the per-socket spinlock held and requires low latency 231 * access. Therefore we special case it's implementation. 232 */ 233 struct { 234 struct sk_buff *head; 235 struct sk_buff *tail; 236 } sk_backlog; 237 wait_queue_head_t *sk_sleep; 238 struct dst_entry *sk_dst_cache; 239 #ifdef CONFIG_XFRM 240 struct xfrm_policy *sk_policy[2]; 241 #endif 242 rwlock_t sk_dst_lock; 243 atomic_t sk_rmem_alloc; 244 atomic_t sk_wmem_alloc; 245 atomic_t sk_omem_alloc; 246 int sk_sndbuf; 247 struct sk_buff_head sk_receive_queue; 248 struct sk_buff_head sk_write_queue; 249 #ifdef CONFIG_NET_DMA 250 struct sk_buff_head sk_async_wait_queue; 251 #endif 252 int sk_wmem_queued; 253 int sk_forward_alloc; 254 gfp_t sk_allocation; 255 int sk_route_caps; 256 int sk_gso_type; 257 unsigned int sk_gso_max_size; 258 int sk_rcvlowat; 259 unsigned long sk_flags; 260 unsigned long sk_lingertime; 261 struct sk_buff_head sk_error_queue; 262 struct proto *sk_prot_creator; 263 rwlock_t sk_callback_lock; 264 int sk_err, 265 sk_err_soft; 266 atomic_t sk_drops; 267 unsigned short sk_ack_backlog; 268 unsigned short sk_max_ack_backlog; 269 __u32 sk_priority; 270 struct ucred sk_peercred; 271 long sk_rcvtimeo; 272 long sk_sndtimeo; 273 struct sk_filter *sk_filter; 274 void *sk_protinfo; 275 struct timer_list sk_timer; 276 ktime_t sk_stamp; 277 struct socket *sk_socket; 278 void *sk_user_data; 279 struct page *sk_sndmsg_page; 280 struct sk_buff *sk_send_head; 281 __u32 sk_sndmsg_off; 282 int sk_write_pending; 283 #ifdef CONFIG_SECURITY 284 void *sk_security; 285 #endif 286 __u32 sk_mark; 287 /* XXX 4 bytes hole on 64 bit */ 288 void (*sk_state_change)(struct sock *sk); 289 void (*sk_data_ready)(struct sock *sk, int bytes); 290 void (*sk_write_space)(struct sock *sk); 291 void (*sk_error_report)(struct sock *sk); 292 int (*sk_backlog_rcv)(struct sock *sk, 293 struct sk_buff *skb); 294 void (*sk_destruct)(struct sock *sk); 295 }; 296 297 /* 298 * Hashed lists helper routines 299 */ 300 static inline struct sock *__sk_head(const struct hlist_head *head) 301 { 302 return hlist_entry(head->first, struct sock, sk_node); 303 } 304 305 static inline struct sock *sk_head(const struct hlist_head *head) 306 { 307 return hlist_empty(head) ? NULL : __sk_head(head); 308 } 309 310 static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) 311 { 312 return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); 313 } 314 315 static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) 316 { 317 return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); 318 } 319 320 static inline struct sock *sk_next(const struct sock *sk) 321 { 322 return sk->sk_node.next ? 323 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; 324 } 325 326 static inline struct sock *sk_nulls_next(const struct sock *sk) 327 { 328 return (!is_a_nulls(sk->sk_nulls_node.next)) ? 329 hlist_nulls_entry(sk->sk_nulls_node.next, 330 struct sock, sk_nulls_node) : 331 NULL; 332 } 333 334 static inline int sk_unhashed(const struct sock *sk) 335 { 336 return hlist_unhashed(&sk->sk_node); 337 } 338 339 static inline int sk_hashed(const struct sock *sk) 340 { 341 return !sk_unhashed(sk); 342 } 343 344 static __inline__ void sk_node_init(struct hlist_node *node) 345 { 346 node->pprev = NULL; 347 } 348 349 static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) 350 { 351 node->pprev = NULL; 352 } 353 354 static __inline__ void __sk_del_node(struct sock *sk) 355 { 356 __hlist_del(&sk->sk_node); 357 } 358 359 static __inline__ int __sk_del_node_init(struct sock *sk) 360 { 361 if (sk_hashed(sk)) { 362 __sk_del_node(sk); 363 sk_node_init(&sk->sk_node); 364 return 1; 365 } 366 return 0; 367 } 368 369 /* Grab socket reference count. This operation is valid only 370 when sk is ALREADY grabbed f.e. it is found in hash table 371 or a list and the lookup is made under lock preventing hash table 372 modifications. 373 */ 374 375 static inline void sock_hold(struct sock *sk) 376 { 377 atomic_inc(&sk->sk_refcnt); 378 } 379 380 /* Ungrab socket in the context, which assumes that socket refcnt 381 cannot hit zero, f.e. it is true in context of any socketcall. 382 */ 383 static inline void __sock_put(struct sock *sk) 384 { 385 atomic_dec(&sk->sk_refcnt); 386 } 387 388 static __inline__ int sk_del_node_init(struct sock *sk) 389 { 390 int rc = __sk_del_node_init(sk); 391 392 if (rc) { 393 /* paranoid for a while -acme */ 394 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 395 __sock_put(sk); 396 } 397 return rc; 398 } 399 400 static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) 401 { 402 if (sk_hashed(sk)) { 403 hlist_nulls_del_init_rcu(&sk->sk_nulls_node); 404 return 1; 405 } 406 return 0; 407 } 408 409 static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) 410 { 411 int rc = __sk_nulls_del_node_init_rcu(sk); 412 413 if (rc) { 414 /* paranoid for a while -acme */ 415 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 416 __sock_put(sk); 417 } 418 return rc; 419 } 420 421 static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) 422 { 423 hlist_add_head(&sk->sk_node, list); 424 } 425 426 static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) 427 { 428 sock_hold(sk); 429 __sk_add_node(sk, list); 430 } 431 432 static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) 433 { 434 hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); 435 } 436 437 static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) 438 { 439 sock_hold(sk); 440 __sk_nulls_add_node_rcu(sk, list); 441 } 442 443 static __inline__ void __sk_del_bind_node(struct sock *sk) 444 { 445 __hlist_del(&sk->sk_bind_node); 446 } 447 448 static __inline__ void sk_add_bind_node(struct sock *sk, 449 struct hlist_head *list) 450 { 451 hlist_add_head(&sk->sk_bind_node, list); 452 } 453 454 #define sk_for_each(__sk, node, list) \ 455 hlist_for_each_entry(__sk, node, list, sk_node) 456 #define sk_nulls_for_each(__sk, node, list) \ 457 hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) 458 #define sk_nulls_for_each_rcu(__sk, node, list) \ 459 hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) 460 #define sk_for_each_from(__sk, node) \ 461 if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ 462 hlist_for_each_entry_from(__sk, node, sk_node) 463 #define sk_nulls_for_each_from(__sk, node) \ 464 if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ 465 hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) 466 #define sk_for_each_continue(__sk, node) \ 467 if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ 468 hlist_for_each_entry_continue(__sk, node, sk_node) 469 #define sk_for_each_safe(__sk, node, tmp, list) \ 470 hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) 471 #define sk_for_each_bound(__sk, node, list) \ 472 hlist_for_each_entry(__sk, node, list, sk_bind_node) 473 474 /* Sock flags */ 475 enum sock_flags { 476 SOCK_DEAD, 477 SOCK_DONE, 478 SOCK_URGINLINE, 479 SOCK_KEEPOPEN, 480 SOCK_LINGER, 481 SOCK_DESTROY, 482 SOCK_BROADCAST, 483 SOCK_TIMESTAMP, 484 SOCK_ZAPPED, 485 SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ 486 SOCK_DBG, /* %SO_DEBUG setting */ 487 SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ 488 SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ 489 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ 490 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 491 SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ 492 SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ 493 SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ 494 SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ 495 SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ 496 SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ 497 SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ 498 }; 499 500 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) 501 { 502 nsk->sk_flags = osk->sk_flags; 503 } 504 505 static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) 506 { 507 __set_bit(flag, &sk->sk_flags); 508 } 509 510 static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag) 511 { 512 __clear_bit(flag, &sk->sk_flags); 513 } 514 515 static inline int sock_flag(struct sock *sk, enum sock_flags flag) 516 { 517 return test_bit(flag, &sk->sk_flags); 518 } 519 520 static inline void sk_acceptq_removed(struct sock *sk) 521 { 522 sk->sk_ack_backlog--; 523 } 524 525 static inline void sk_acceptq_added(struct sock *sk) 526 { 527 sk->sk_ack_backlog++; 528 } 529 530 static inline int sk_acceptq_is_full(struct sock *sk) 531 { 532 return sk->sk_ack_backlog > sk->sk_max_ack_backlog; 533 } 534 535 /* 536 * Compute minimal free write space needed to queue new packets. 537 */ 538 static inline int sk_stream_min_wspace(struct sock *sk) 539 { 540 return sk->sk_wmem_queued >> 1; 541 } 542 543 static inline int sk_stream_wspace(struct sock *sk) 544 { 545 return sk->sk_sndbuf - sk->sk_wmem_queued; 546 } 547 548 extern void sk_stream_write_space(struct sock *sk); 549 550 static inline int sk_stream_memory_free(struct sock *sk) 551 { 552 return sk->sk_wmem_queued < sk->sk_sndbuf; 553 } 554 555 /* The per-socket spinlock must be held here. */ 556 static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) 557 { 558 if (!sk->sk_backlog.tail) { 559 sk->sk_backlog.head = sk->sk_backlog.tail = skb; 560 } else { 561 sk->sk_backlog.tail->next = skb; 562 sk->sk_backlog.tail = skb; 563 } 564 skb->next = NULL; 565 } 566 567 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 568 { 569 return sk->sk_backlog_rcv(sk, skb); 570 } 571 572 #define sk_wait_event(__sk, __timeo, __condition) \ 573 ({ int __rc; \ 574 release_sock(__sk); \ 575 __rc = __condition; \ 576 if (!__rc) { \ 577 *(__timeo) = schedule_timeout(*(__timeo)); \ 578 } \ 579 lock_sock(__sk); \ 580 __rc = __condition; \ 581 __rc; \ 582 }) 583 584 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); 585 extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); 586 extern void sk_stream_wait_close(struct sock *sk, long timeo_p); 587 extern int sk_stream_error(struct sock *sk, int flags, int err); 588 extern void sk_stream_kill_queues(struct sock *sk); 589 590 extern int sk_wait_data(struct sock *sk, long *timeo); 591 592 struct request_sock_ops; 593 struct timewait_sock_ops; 594 struct inet_hashinfo; 595 struct raw_hashinfo; 596 597 /* Networking protocol blocks we attach to sockets. 598 * socket layer -> transport layer interface 599 * transport -> network interface is defined by struct inet_proto 600 */ 601 struct proto { 602 void (*close)(struct sock *sk, 603 long timeout); 604 int (*connect)(struct sock *sk, 605 struct sockaddr *uaddr, 606 int addr_len); 607 int (*disconnect)(struct sock *sk, int flags); 608 609 struct sock * (*accept) (struct sock *sk, int flags, int *err); 610 611 int (*ioctl)(struct sock *sk, int cmd, 612 unsigned long arg); 613 int (*init)(struct sock *sk); 614 void (*destroy)(struct sock *sk); 615 void (*shutdown)(struct sock *sk, int how); 616 int (*setsockopt)(struct sock *sk, int level, 617 int optname, char __user *optval, 618 int optlen); 619 int (*getsockopt)(struct sock *sk, int level, 620 int optname, char __user *optval, 621 int __user *option); 622 #ifdef CONFIG_COMPAT 623 int (*compat_setsockopt)(struct sock *sk, 624 int level, 625 int optname, char __user *optval, 626 int optlen); 627 int (*compat_getsockopt)(struct sock *sk, 628 int level, 629 int optname, char __user *optval, 630 int __user *option); 631 #endif 632 int (*sendmsg)(struct kiocb *iocb, struct sock *sk, 633 struct msghdr *msg, size_t len); 634 int (*recvmsg)(struct kiocb *iocb, struct sock *sk, 635 struct msghdr *msg, 636 size_t len, int noblock, int flags, 637 int *addr_len); 638 int (*sendpage)(struct sock *sk, struct page *page, 639 int offset, size_t size, int flags); 640 int (*bind)(struct sock *sk, 641 struct sockaddr *uaddr, int addr_len); 642 643 int (*backlog_rcv) (struct sock *sk, 644 struct sk_buff *skb); 645 646 /* Keeping track of sk's, looking them up, and port selection methods. */ 647 void (*hash)(struct sock *sk); 648 void (*unhash)(struct sock *sk); 649 int (*get_port)(struct sock *sk, unsigned short snum); 650 651 /* Keeping track of sockets in use */ 652 #ifdef CONFIG_PROC_FS 653 unsigned int inuse_idx; 654 #endif 655 656 /* Memory pressure */ 657 void (*enter_memory_pressure)(struct sock *sk); 658 atomic_t *memory_allocated; /* Current allocated memory. */ 659 struct percpu_counter *sockets_allocated; /* Current number of sockets. */ 660 /* 661 * Pressure flag: try to collapse. 662 * Technical note: it is used by multiple contexts non atomically. 663 * All the __sk_mem_schedule() is of this nature: accounting 664 * is strict, actions are advisory and have some latency. 665 */ 666 int *memory_pressure; 667 int *sysctl_mem; 668 int *sysctl_wmem; 669 int *sysctl_rmem; 670 int max_header; 671 672 struct kmem_cache *slab; 673 unsigned int obj_size; 674 int slab_flags; 675 676 struct percpu_counter *orphan_count; 677 678 struct request_sock_ops *rsk_prot; 679 struct timewait_sock_ops *twsk_prot; 680 681 union { 682 struct inet_hashinfo *hashinfo; 683 struct udp_table *udp_table; 684 struct raw_hashinfo *raw_hash; 685 } h; 686 687 struct module *owner; 688 689 char name[32]; 690 691 struct list_head node; 692 #ifdef SOCK_REFCNT_DEBUG 693 atomic_t socks; 694 #endif 695 }; 696 697 extern int proto_register(struct proto *prot, int alloc_slab); 698 extern void proto_unregister(struct proto *prot); 699 700 #ifdef SOCK_REFCNT_DEBUG 701 static inline void sk_refcnt_debug_inc(struct sock *sk) 702 { 703 atomic_inc(&sk->sk_prot->socks); 704 } 705 706 static inline void sk_refcnt_debug_dec(struct sock *sk) 707 { 708 atomic_dec(&sk->sk_prot->socks); 709 printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", 710 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); 711 } 712 713 static inline void sk_refcnt_debug_release(const struct sock *sk) 714 { 715 if (atomic_read(&sk->sk_refcnt) != 1) 716 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", 717 sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); 718 } 719 #else /* SOCK_REFCNT_DEBUG */ 720 #define sk_refcnt_debug_inc(sk) do { } while (0) 721 #define sk_refcnt_debug_dec(sk) do { } while (0) 722 #define sk_refcnt_debug_release(sk) do { } while (0) 723 #endif /* SOCK_REFCNT_DEBUG */ 724 725 726 #ifdef CONFIG_PROC_FS 727 /* Called with local bh disabled */ 728 extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); 729 extern int sock_prot_inuse_get(struct net *net, struct proto *proto); 730 #else 731 static void inline sock_prot_inuse_add(struct net *net, struct proto *prot, 732 int inc) 733 { 734 } 735 #endif 736 737 738 /* With per-bucket locks this operation is not-atomic, so that 739 * this version is not worse. 740 */ 741 static inline void __sk_prot_rehash(struct sock *sk) 742 { 743 sk->sk_prot->unhash(sk); 744 sk->sk_prot->hash(sk); 745 } 746 747 /* About 10 seconds */ 748 #define SOCK_DESTROY_TIME (10*HZ) 749 750 /* Sockets 0-1023 can't be bound to unless you are superuser */ 751 #define PROT_SOCK 1024 752 753 #define SHUTDOWN_MASK 3 754 #define RCV_SHUTDOWN 1 755 #define SEND_SHUTDOWN 2 756 757 #define SOCK_SNDBUF_LOCK 1 758 #define SOCK_RCVBUF_LOCK 2 759 #define SOCK_BINDADDR_LOCK 4 760 #define SOCK_BINDPORT_LOCK 8 761 762 /* sock_iocb: used to kick off async processing of socket ios */ 763 struct sock_iocb { 764 struct list_head list; 765 766 int flags; 767 int size; 768 struct socket *sock; 769 struct sock *sk; 770 struct scm_cookie *scm; 771 struct msghdr *msg, async_msg; 772 struct kiocb *kiocb; 773 }; 774 775 static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb) 776 { 777 return (struct sock_iocb *)iocb->private; 778 } 779 780 static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si) 781 { 782 return si->kiocb; 783 } 784 785 struct socket_alloc { 786 struct socket socket; 787 struct inode vfs_inode; 788 }; 789 790 static inline struct socket *SOCKET_I(struct inode *inode) 791 { 792 return &container_of(inode, struct socket_alloc, vfs_inode)->socket; 793 } 794 795 static inline struct inode *SOCK_INODE(struct socket *socket) 796 { 797 return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 798 } 799 800 /* 801 * Functions for memory accounting 802 */ 803 extern int __sk_mem_schedule(struct sock *sk, int size, int kind); 804 extern void __sk_mem_reclaim(struct sock *sk); 805 806 #define SK_MEM_QUANTUM ((int)PAGE_SIZE) 807 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) 808 #define SK_MEM_SEND 0 809 #define SK_MEM_RECV 1 810 811 static inline int sk_mem_pages(int amt) 812 { 813 return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; 814 } 815 816 static inline int sk_has_account(struct sock *sk) 817 { 818 /* return true if protocol supports memory accounting */ 819 return !!sk->sk_prot->memory_allocated; 820 } 821 822 static inline int sk_wmem_schedule(struct sock *sk, int size) 823 { 824 if (!sk_has_account(sk)) 825 return 1; 826 return size <= sk->sk_forward_alloc || 827 __sk_mem_schedule(sk, size, SK_MEM_SEND); 828 } 829 830 static inline int sk_rmem_schedule(struct sock *sk, int size) 831 { 832 if (!sk_has_account(sk)) 833 return 1; 834 return size <= sk->sk_forward_alloc || 835 __sk_mem_schedule(sk, size, SK_MEM_RECV); 836 } 837 838 static inline void sk_mem_reclaim(struct sock *sk) 839 { 840 if (!sk_has_account(sk)) 841 return; 842 if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) 843 __sk_mem_reclaim(sk); 844 } 845 846 static inline void sk_mem_reclaim_partial(struct sock *sk) 847 { 848 if (!sk_has_account(sk)) 849 return; 850 if (sk->sk_forward_alloc > SK_MEM_QUANTUM) 851 __sk_mem_reclaim(sk); 852 } 853 854 static inline void sk_mem_charge(struct sock *sk, int size) 855 { 856 if (!sk_has_account(sk)) 857 return; 858 sk->sk_forward_alloc -= size; 859 } 860 861 static inline void sk_mem_uncharge(struct sock *sk, int size) 862 { 863 if (!sk_has_account(sk)) 864 return; 865 sk->sk_forward_alloc += size; 866 } 867 868 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) 869 { 870 sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 871 sk->sk_wmem_queued -= skb->truesize; 872 sk_mem_uncharge(sk, skb->truesize); 873 __kfree_skb(skb); 874 } 875 876 /* Used by processes to "lock" a socket state, so that 877 * interrupts and bottom half handlers won't change it 878 * from under us. It essentially blocks any incoming 879 * packets, so that we won't get any new data or any 880 * packets that change the state of the socket. 881 * 882 * While locked, BH processing will add new packets to 883 * the backlog queue. This queue is processed by the 884 * owner of the socket lock right before it is released. 885 * 886 * Since ~2.3.5 it is also exclusive sleep lock serializing 887 * accesses from user process context. 888 */ 889 #define sock_owned_by_user(sk) ((sk)->sk_lock.owned) 890 891 /* 892 * Macro so as to not evaluate some arguments when 893 * lockdep is not enabled. 894 * 895 * Mark both the sk_lock and the sk_lock.slock as a 896 * per-address-family lock class. 897 */ 898 #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ 899 do { \ 900 sk->sk_lock.owned = 0; \ 901 init_waitqueue_head(&sk->sk_lock.wq); \ 902 spin_lock_init(&(sk)->sk_lock.slock); \ 903 debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ 904 sizeof((sk)->sk_lock)); \ 905 lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ 906 (skey), (sname)); \ 907 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ 908 } while (0) 909 910 extern void lock_sock_nested(struct sock *sk, int subclass); 911 912 static inline void lock_sock(struct sock *sk) 913 { 914 lock_sock_nested(sk, 0); 915 } 916 917 extern void release_sock(struct sock *sk); 918 919 /* BH context may only use the following locking interface. */ 920 #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 921 #define bh_lock_sock_nested(__sk) \ 922 spin_lock_nested(&((__sk)->sk_lock.slock), \ 923 SINGLE_DEPTH_NESTING) 924 #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 925 926 extern struct sock *sk_alloc(struct net *net, int family, 927 gfp_t priority, 928 struct proto *prot); 929 extern void sk_free(struct sock *sk); 930 extern void sk_release_kernel(struct sock *sk); 931 extern struct sock *sk_clone(const struct sock *sk, 932 const gfp_t priority); 933 934 extern struct sk_buff *sock_wmalloc(struct sock *sk, 935 unsigned long size, int force, 936 gfp_t priority); 937 extern struct sk_buff *sock_rmalloc(struct sock *sk, 938 unsigned long size, int force, 939 gfp_t priority); 940 extern void sock_wfree(struct sk_buff *skb); 941 extern void sock_rfree(struct sk_buff *skb); 942 943 extern int sock_setsockopt(struct socket *sock, int level, 944 int op, char __user *optval, 945 int optlen); 946 947 extern int sock_getsockopt(struct socket *sock, int level, 948 int op, char __user *optval, 949 int __user *optlen); 950 extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, 951 unsigned long size, 952 int noblock, 953 int *errcode); 954 extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 955 unsigned long header_len, 956 unsigned long data_len, 957 int noblock, 958 int *errcode); 959 extern void *sock_kmalloc(struct sock *sk, int size, 960 gfp_t priority); 961 extern void sock_kfree_s(struct sock *sk, void *mem, int size); 962 extern void sk_send_sigurg(struct sock *sk); 963 964 /* 965 * Functions to fill in entries in struct proto_ops when a protocol 966 * does not implement a particular function. 967 */ 968 extern int sock_no_bind(struct socket *, 969 struct sockaddr *, int); 970 extern int sock_no_connect(struct socket *, 971 struct sockaddr *, int, int); 972 extern int sock_no_socketpair(struct socket *, 973 struct socket *); 974 extern int sock_no_accept(struct socket *, 975 struct socket *, int); 976 extern int sock_no_getname(struct socket *, 977 struct sockaddr *, int *, int); 978 extern unsigned int sock_no_poll(struct file *, struct socket *, 979 struct poll_table_struct *); 980 extern int sock_no_ioctl(struct socket *, unsigned int, 981 unsigned long); 982 extern int sock_no_listen(struct socket *, int); 983 extern int sock_no_shutdown(struct socket *, int); 984 extern int sock_no_getsockopt(struct socket *, int , int, 985 char __user *, int __user *); 986 extern int sock_no_setsockopt(struct socket *, int, int, 987 char __user *, int); 988 extern int sock_no_sendmsg(struct kiocb *, struct socket *, 989 struct msghdr *, size_t); 990 extern int sock_no_recvmsg(struct kiocb *, struct socket *, 991 struct msghdr *, size_t, int); 992 extern int sock_no_mmap(struct file *file, 993 struct socket *sock, 994 struct vm_area_struct *vma); 995 extern ssize_t sock_no_sendpage(struct socket *sock, 996 struct page *page, 997 int offset, size_t size, 998 int flags); 999 1000 /* 1001 * Functions to fill in entries in struct proto_ops when a protocol 1002 * uses the inet style. 1003 */ 1004 extern int sock_common_getsockopt(struct socket *sock, int level, int optname, 1005 char __user *optval, int __user *optlen); 1006 extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 1007 struct msghdr *msg, size_t size, int flags); 1008 extern int sock_common_setsockopt(struct socket *sock, int level, int optname, 1009 char __user *optval, int optlen); 1010 extern int compat_sock_common_getsockopt(struct socket *sock, int level, 1011 int optname, char __user *optval, int __user *optlen); 1012 extern int compat_sock_common_setsockopt(struct socket *sock, int level, 1013 int optname, char __user *optval, int optlen); 1014 1015 extern void sk_common_release(struct sock *sk); 1016 1017 /* 1018 * Default socket callbacks and setup code 1019 */ 1020 1021 /* Initialise core socket variables */ 1022 extern void sock_init_data(struct socket *sock, struct sock *sk); 1023 1024 /** 1025 * sk_filter_release: Release a socket filter 1026 * @fp: filter to remove 1027 * 1028 * Remove a filter from a socket and release its resources. 1029 */ 1030 1031 static inline void sk_filter_release(struct sk_filter *fp) 1032 { 1033 if (atomic_dec_and_test(&fp->refcnt)) 1034 kfree(fp); 1035 } 1036 1037 static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 1038 { 1039 unsigned int size = sk_filter_len(fp); 1040 1041 atomic_sub(size, &sk->sk_omem_alloc); 1042 sk_filter_release(fp); 1043 } 1044 1045 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1046 { 1047 atomic_inc(&fp->refcnt); 1048 atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); 1049 } 1050 1051 /* 1052 * Socket reference counting postulates. 1053 * 1054 * * Each user of socket SHOULD hold a reference count. 1055 * * Each access point to socket (an hash table bucket, reference from a list, 1056 * running timer, skb in flight MUST hold a reference count. 1057 * * When reference count hits 0, it means it will never increase back. 1058 * * When reference count hits 0, it means that no references from 1059 * outside exist to this socket and current process on current CPU 1060 * is last user and may/should destroy this socket. 1061 * * sk_free is called from any context: process, BH, IRQ. When 1062 * it is called, socket has no references from outside -> sk_free 1063 * may release descendant resources allocated by the socket, but 1064 * to the time when it is called, socket is NOT referenced by any 1065 * hash tables, lists etc. 1066 * * Packets, delivered from outside (from network or from another process) 1067 * and enqueued on receive/error queues SHOULD NOT grab reference count, 1068 * when they sit in queue. Otherwise, packets will leak to hole, when 1069 * socket is looked up by one cpu and unhasing is made by another CPU. 1070 * It is true for udp/raw, netlink (leak to receive and error queues), tcp 1071 * (leak to backlog). Packet socket does all the processing inside 1072 * BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets 1073 * use separate SMP lock, so that they are prone too. 1074 */ 1075 1076 /* Ungrab socket and destroy it, if it was the last reference. */ 1077 static inline void sock_put(struct sock *sk) 1078 { 1079 if (atomic_dec_and_test(&sk->sk_refcnt)) 1080 sk_free(sk); 1081 } 1082 1083 extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, 1084 const int nested); 1085 1086 static inline void sk_set_socket(struct sock *sk, struct socket *sock) 1087 { 1088 sk->sk_socket = sock; 1089 } 1090 1091 /* Detach socket from process context. 1092 * Announce socket dead, detach it from wait queue and inode. 1093 * Note that parent inode held reference count on this struct sock, 1094 * we do not release it in this function, because protocol 1095 * probably wants some additional cleanups or even continuing 1096 * to work with this socket (TCP). 1097 */ 1098 static inline void sock_orphan(struct sock *sk) 1099 { 1100 write_lock_bh(&sk->sk_callback_lock); 1101 sock_set_flag(sk, SOCK_DEAD); 1102 sk_set_socket(sk, NULL); 1103 sk->sk_sleep = NULL; 1104 write_unlock_bh(&sk->sk_callback_lock); 1105 } 1106 1107 static inline void sock_graft(struct sock *sk, struct socket *parent) 1108 { 1109 write_lock_bh(&sk->sk_callback_lock); 1110 sk->sk_sleep = &parent->wait; 1111 parent->sk = sk; 1112 sk_set_socket(sk, parent); 1113 security_sock_graft(sk, parent); 1114 write_unlock_bh(&sk->sk_callback_lock); 1115 } 1116 1117 extern int sock_i_uid(struct sock *sk); 1118 extern unsigned long sock_i_ino(struct sock *sk); 1119 1120 static inline struct dst_entry * 1121 __sk_dst_get(struct sock *sk) 1122 { 1123 return sk->sk_dst_cache; 1124 } 1125 1126 static inline struct dst_entry * 1127 sk_dst_get(struct sock *sk) 1128 { 1129 struct dst_entry *dst; 1130 1131 read_lock(&sk->sk_dst_lock); 1132 dst = sk->sk_dst_cache; 1133 if (dst) 1134 dst_hold(dst); 1135 read_unlock(&sk->sk_dst_lock); 1136 return dst; 1137 } 1138 1139 static inline void 1140 __sk_dst_set(struct sock *sk, struct dst_entry *dst) 1141 { 1142 struct dst_entry *old_dst; 1143 1144 old_dst = sk->sk_dst_cache; 1145 sk->sk_dst_cache = dst; 1146 dst_release(old_dst); 1147 } 1148 1149 static inline void 1150 sk_dst_set(struct sock *sk, struct dst_entry *dst) 1151 { 1152 write_lock(&sk->sk_dst_lock); 1153 __sk_dst_set(sk, dst); 1154 write_unlock(&sk->sk_dst_lock); 1155 } 1156 1157 static inline void 1158 __sk_dst_reset(struct sock *sk) 1159 { 1160 struct dst_entry *old_dst; 1161 1162 old_dst = sk->sk_dst_cache; 1163 sk->sk_dst_cache = NULL; 1164 dst_release(old_dst); 1165 } 1166 1167 static inline void 1168 sk_dst_reset(struct sock *sk) 1169 { 1170 write_lock(&sk->sk_dst_lock); 1171 __sk_dst_reset(sk); 1172 write_unlock(&sk->sk_dst_lock); 1173 } 1174 1175 extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); 1176 1177 extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); 1178 1179 static inline int sk_can_gso(const struct sock *sk) 1180 { 1181 return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); 1182 } 1183 1184 extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); 1185 1186 static inline int skb_copy_to_page(struct sock *sk, char __user *from, 1187 struct sk_buff *skb, struct page *page, 1188 int off, int copy) 1189 { 1190 if (skb->ip_summed == CHECKSUM_NONE) { 1191 int err = 0; 1192 __wsum csum = csum_and_copy_from_user(from, 1193 page_address(page) + off, 1194 copy, 0, &err); 1195 if (err) 1196 return err; 1197 skb->csum = csum_block_add(skb->csum, csum, skb->len); 1198 } else if (copy_from_user(page_address(page) + off, from, copy)) 1199 return -EFAULT; 1200 1201 skb->len += copy; 1202 skb->data_len += copy; 1203 skb->truesize += copy; 1204 sk->sk_wmem_queued += copy; 1205 sk_mem_charge(sk, copy); 1206 return 0; 1207 } 1208 1209 /* 1210 * Queue a received datagram if it will fit. Stream and sequenced 1211 * protocols can't normally use this as they need to fit buffers in 1212 * and play with them. 1213 * 1214 * Inlined as it's very short and called for pretty much every 1215 * packet ever received. 1216 */ 1217 1218 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 1219 { 1220 sock_hold(sk); 1221 skb->sk = sk; 1222 skb->destructor = sock_wfree; 1223 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 1224 } 1225 1226 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 1227 { 1228 skb->sk = sk; 1229 skb->destructor = sock_rfree; 1230 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1231 sk_mem_charge(sk, skb->truesize); 1232 } 1233 1234 extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1235 unsigned long expires); 1236 1237 extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); 1238 1239 extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 1240 1241 static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) 1242 { 1243 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces 1244 number of warnings when compiling with -W --ANK 1245 */ 1246 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 1247 (unsigned)sk->sk_rcvbuf) 1248 return -ENOMEM; 1249 skb_set_owner_r(skb, sk); 1250 skb_queue_tail(&sk->sk_error_queue, skb); 1251 if (!sock_flag(sk, SOCK_DEAD)) 1252 sk->sk_data_ready(sk, skb->len); 1253 return 0; 1254 } 1255 1256 /* 1257 * Recover an error report and clear atomically 1258 */ 1259 1260 static inline int sock_error(struct sock *sk) 1261 { 1262 int err; 1263 if (likely(!sk->sk_err)) 1264 return 0; 1265 err = xchg(&sk->sk_err, 0); 1266 return -err; 1267 } 1268 1269 static inline unsigned long sock_wspace(struct sock *sk) 1270 { 1271 int amt = 0; 1272 1273 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 1274 amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); 1275 if (amt < 0) 1276 amt = 0; 1277 } 1278 return amt; 1279 } 1280 1281 static inline void sk_wake_async(struct sock *sk, int how, int band) 1282 { 1283 if (sk->sk_socket && sk->sk_socket->fasync_list) 1284 sock_wake_async(sk->sk_socket, how, band); 1285 } 1286 1287 #define SOCK_MIN_SNDBUF 2048 1288 #define SOCK_MIN_RCVBUF 256 1289 1290 static inline void sk_stream_moderate_sndbuf(struct sock *sk) 1291 { 1292 if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { 1293 sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); 1294 sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF); 1295 } 1296 } 1297 1298 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); 1299 1300 static inline struct page *sk_stream_alloc_page(struct sock *sk) 1301 { 1302 struct page *page = NULL; 1303 1304 page = alloc_pages(sk->sk_allocation, 0); 1305 if (!page) { 1306 sk->sk_prot->enter_memory_pressure(sk); 1307 sk_stream_moderate_sndbuf(sk); 1308 } 1309 return page; 1310 } 1311 1312 /* 1313 * Default write policy as shown to user space via poll/select/SIGIO 1314 */ 1315 static inline int sock_writeable(const struct sock *sk) 1316 { 1317 return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); 1318 } 1319 1320 static inline gfp_t gfp_any(void) 1321 { 1322 return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; 1323 } 1324 1325 static inline long sock_rcvtimeo(const struct sock *sk, int noblock) 1326 { 1327 return noblock ? 0 : sk->sk_rcvtimeo; 1328 } 1329 1330 static inline long sock_sndtimeo(const struct sock *sk, int noblock) 1331 { 1332 return noblock ? 0 : sk->sk_sndtimeo; 1333 } 1334 1335 static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) 1336 { 1337 return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; 1338 } 1339 1340 /* Alas, with timeout socket operations are not restartable. 1341 * Compare this to poll(). 1342 */ 1343 static inline int sock_intr_errno(long timeo) 1344 { 1345 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; 1346 } 1347 1348 extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 1349 struct sk_buff *skb); 1350 1351 static __inline__ void 1352 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 1353 { 1354 ktime_t kt = skb->tstamp; 1355 struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); 1356 1357 /* 1358 * generate control messages if 1359 * - receive time stamping in software requested (SOCK_RCVTSTAMP 1360 * or SOCK_TIMESTAMPING_RX_SOFTWARE) 1361 * - software time stamp available and wanted 1362 * (SOCK_TIMESTAMPING_SOFTWARE) 1363 * - hardware time stamps available and wanted 1364 * (SOCK_TIMESTAMPING_SYS_HARDWARE or 1365 * SOCK_TIMESTAMPING_RAW_HARDWARE) 1366 */ 1367 if (sock_flag(sk, SOCK_RCVTSTAMP) || 1368 sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || 1369 (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || 1370 (hwtstamps->hwtstamp.tv64 && 1371 sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) || 1372 (hwtstamps->syststamp.tv64 && 1373 sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))) 1374 __sock_recv_timestamp(msg, sk, skb); 1375 else 1376 sk->sk_stamp = kt; 1377 } 1378 1379 /** 1380 * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped 1381 * @msg: outgoing packet 1382 * @sk: socket sending this packet 1383 * @shtx: filled with instructions for time stamping 1384 * 1385 * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if 1386 * parameters are invalid. 1387 */ 1388 extern int sock_tx_timestamp(struct msghdr *msg, 1389 struct sock *sk, 1390 union skb_shared_tx *shtx); 1391 1392 1393 /** 1394 * sk_eat_skb - Release a skb if it is no longer needed 1395 * @sk: socket to eat this skb from 1396 * @skb: socket buffer to eat 1397 * @copied_early: flag indicating whether DMA operations copied this data early 1398 * 1399 * This routine must be called with interrupts disabled or with the socket 1400 * locked so that the sk_buff queue operation is ok. 1401 */ 1402 #ifdef CONFIG_NET_DMA 1403 static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) 1404 { 1405 __skb_unlink(skb, &sk->sk_receive_queue); 1406 if (!copied_early) 1407 __kfree_skb(skb); 1408 else 1409 __skb_queue_tail(&sk->sk_async_wait_queue, skb); 1410 } 1411 #else 1412 static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) 1413 { 1414 __skb_unlink(skb, &sk->sk_receive_queue); 1415 __kfree_skb(skb); 1416 } 1417 #endif 1418 1419 static inline 1420 struct net *sock_net(const struct sock *sk) 1421 { 1422 #ifdef CONFIG_NET_NS 1423 return sk->sk_net; 1424 #else 1425 return &init_net; 1426 #endif 1427 } 1428 1429 static inline 1430 void sock_net_set(struct sock *sk, struct net *net) 1431 { 1432 #ifdef CONFIG_NET_NS 1433 sk->sk_net = net; 1434 #endif 1435 } 1436 1437 /* 1438 * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. 1439 * They should not hold a referrence to a namespace in order to allow 1440 * to stop it. 1441 * Sockets after sk_change_net should be released using sk_release_kernel 1442 */ 1443 static inline void sk_change_net(struct sock *sk, struct net *net) 1444 { 1445 put_net(sock_net(sk)); 1446 sock_net_set(sk, hold_net(net)); 1447 } 1448 1449 static inline struct sock *skb_steal_sock(struct sk_buff *skb) 1450 { 1451 if (unlikely(skb->sk)) { 1452 struct sock *sk = skb->sk; 1453 1454 skb->destructor = NULL; 1455 skb->sk = NULL; 1456 return sk; 1457 } 1458 return NULL; 1459 } 1460 1461 extern void sock_enable_timestamp(struct sock *sk, int flag); 1462 extern int sock_get_timestamp(struct sock *, struct timeval __user *); 1463 extern int sock_get_timestampns(struct sock *, struct timespec __user *); 1464 1465 /* 1466 * Enable debug/info messages 1467 */ 1468 extern int net_msg_warn; 1469 #define NETDEBUG(fmt, args...) \ 1470 do { if (net_msg_warn) printk(fmt,##args); } while (0) 1471 1472 #define LIMIT_NETDEBUG(fmt, args...) \ 1473 do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0) 1474 1475 extern __u32 sysctl_wmem_max; 1476 extern __u32 sysctl_rmem_max; 1477 1478 extern void sk_init(void); 1479 1480 extern int sysctl_optmem_max; 1481 1482 extern __u32 sysctl_wmem_default; 1483 extern __u32 sysctl_rmem_default; 1484 1485 #endif /* _SOCK_H */ 1486