1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 * 85 * 86 * This program is free software; you can redistribute it and/or 87 * modify it under the terms of the GNU General Public License 88 * as published by the Free Software Foundation; either version 89 * 2 of the License, or (at your option) any later version. 90 */ 91 92 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 93 94 #include <linux/capability.h> 95 #include <linux/errno.h> 96 #include <linux/errqueue.h> 97 #include <linux/types.h> 98 #include <linux/socket.h> 99 #include <linux/in.h> 100 #include <linux/kernel.h> 101 #include <linux/module.h> 102 #include <linux/proc_fs.h> 103 #include <linux/seq_file.h> 104 #include <linux/sched.h> 105 #include <linux/sched/mm.h> 106 #include <linux/timer.h> 107 #include <linux/string.h> 108 #include <linux/sockios.h> 109 #include <linux/net.h> 110 #include <linux/mm.h> 111 #include <linux/slab.h> 112 #include <linux/interrupt.h> 113 #include <linux/poll.h> 114 #include <linux/tcp.h> 115 #include <linux/init.h> 116 #include <linux/highmem.h> 117 #include <linux/user_namespace.h> 118 #include <linux/static_key.h> 119 #include <linux/memcontrol.h> 120 #include <linux/prefetch.h> 121 122 #include <linux/uaccess.h> 123 124 #include <linux/netdevice.h> 125 #include <net/protocol.h> 126 #include <linux/skbuff.h> 127 #include <net/net_namespace.h> 128 #include <net/request_sock.h> 129 #include <net/sock.h> 130 #include <linux/net_tstamp.h> 131 #include <net/xfrm.h> 132 #include <linux/ipsec.h> 133 #include <net/cls_cgroup.h> 134 #include <net/netprio_cgroup.h> 135 #include <linux/sock_diag.h> 136 137 #include <linux/filter.h> 138 #include <net/sock_reuseport.h> 139 140 #include <trace/events/sock.h> 141 142 #include <net/tcp.h> 143 #include <net/busy_poll.h> 144 145 static DEFINE_MUTEX(proto_list_mutex); 146 static LIST_HEAD(proto_list); 147 148 /** 149 * sk_ns_capable - General socket capability test 150 * @sk: Socket to use a capability on or through 151 * @user_ns: The user namespace of the capability to use 152 * @cap: The capability to use 153 * 154 * Test to see if the opener of the socket had when the socket was 155 * created and the current process has the capability @cap in the user 156 * namespace @user_ns. 157 */ 158 bool sk_ns_capable(const struct sock *sk, 159 struct user_namespace *user_ns, int cap) 160 { 161 return file_ns_capable(sk->sk_socket->file, user_ns, cap) && 162 ns_capable(user_ns, cap); 163 } 164 EXPORT_SYMBOL(sk_ns_capable); 165 166 /** 167 * sk_capable - Socket global capability test 168 * @sk: Socket to use a capability on or through 169 * @cap: The global capability to use 170 * 171 * Test to see if the opener of the socket had when the socket was 172 * created and the current process has the capability @cap in all user 173 * namespaces. 174 */ 175 bool sk_capable(const struct sock *sk, int cap) 176 { 177 return sk_ns_capable(sk, &init_user_ns, cap); 178 } 179 EXPORT_SYMBOL(sk_capable); 180 181 /** 182 * sk_net_capable - Network namespace socket capability test 183 * @sk: Socket to use a capability on or through 184 * @cap: The capability to use 185 * 186 * Test to see if the opener of the socket had when the socket was created 187 * and the current process has the capability @cap over the network namespace 188 * the socket is a member of. 189 */ 190 bool sk_net_capable(const struct sock *sk, int cap) 191 { 192 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); 193 } 194 EXPORT_SYMBOL(sk_net_capable); 195 196 /* 197 * Each address family might have different locking rules, so we have 198 * one slock key per address family and separate keys for internal and 199 * userspace sockets. 200 */ 201 static struct lock_class_key af_family_keys[AF_MAX]; 202 static struct lock_class_key af_family_kern_keys[AF_MAX]; 203 static struct lock_class_key af_family_slock_keys[AF_MAX]; 204 static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; 205 206 /* 207 * Make lock validator output more readable. (we pre-construct these 208 * strings build-time, so that runtime initialization of socket 209 * locks is fast): 210 */ 211 212 #define _sock_locks(x) \ 213 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ 214 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ 215 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ 216 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ 217 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ 218 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ 219 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ 220 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ 221 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ 222 x "27" , x "28" , x "AF_CAN" , \ 223 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ 224 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ 225 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ 226 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ 227 x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" 228 229 static const char *const af_family_key_strings[AF_MAX+1] = { 230 _sock_locks("sk_lock-") 231 }; 232 static const char *const af_family_slock_key_strings[AF_MAX+1] = { 233 _sock_locks("slock-") 234 }; 235 static const char *const af_family_clock_key_strings[AF_MAX+1] = { 236 _sock_locks("clock-") 237 }; 238 239 static const char *const af_family_kern_key_strings[AF_MAX+1] = { 240 _sock_locks("k-sk_lock-") 241 }; 242 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { 243 _sock_locks("k-slock-") 244 }; 245 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { 246 _sock_locks("k-clock-") 247 }; 248 static const char *const af_family_rlock_key_strings[AF_MAX+1] = { 249 "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" , 250 "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK", 251 "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" , 252 "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" , 253 "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" , 254 "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" , 255 "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" , 256 "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" , 257 "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" , 258 "rlock-27" , "rlock-28" , "rlock-AF_CAN" , 259 "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" , 260 "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , 261 "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , 262 "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , 263 "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" 264 }; 265 static const char *const af_family_wlock_key_strings[AF_MAX+1] = { 266 "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , 267 "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK", 268 "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" , 269 "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" , 270 "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" , 271 "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" , 272 "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" , 273 "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" , 274 "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" , 275 "wlock-27" , "wlock-28" , "wlock-AF_CAN" , 276 "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" , 277 "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , 278 "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , 279 "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , 280 "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" 281 }; 282 static const char *const af_family_elock_key_strings[AF_MAX+1] = { 283 "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , 284 "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK", 285 "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" , 286 "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" , 287 "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" , 288 "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" , 289 "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" , 290 "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" , 291 "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" , 292 "elock-27" , "elock-28" , "elock-AF_CAN" , 293 "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" , 294 "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , 295 "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , 296 "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , 297 "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" 298 }; 299 300 /* 301 * sk_callback_lock and sk queues locking rules are per-address-family, 302 * so split the lock classes by using a per-AF key: 303 */ 304 static struct lock_class_key af_callback_keys[AF_MAX]; 305 static struct lock_class_key af_rlock_keys[AF_MAX]; 306 static struct lock_class_key af_wlock_keys[AF_MAX]; 307 static struct lock_class_key af_elock_keys[AF_MAX]; 308 static struct lock_class_key af_kern_callback_keys[AF_MAX]; 309 310 /* Take into consideration the size of the struct sk_buff overhead in the 311 * determination of these values, since that is non-constant across 312 * platforms. This makes socket queueing behavior and performance 313 * not depend upon such differences. 314 */ 315 #define _SK_MEM_PACKETS 256 316 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 317 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 318 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 319 320 /* Run time adjustable parameters. */ 321 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 322 EXPORT_SYMBOL(sysctl_wmem_max); 323 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 324 EXPORT_SYMBOL(sysctl_rmem_max); 325 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 326 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 327 328 /* Maximal space eaten by iovec or ancillary data plus some space */ 329 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 330 EXPORT_SYMBOL(sysctl_optmem_max); 331 332 int sysctl_tstamp_allow_data __read_mostly = 1; 333 334 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; 335 EXPORT_SYMBOL_GPL(memalloc_socks); 336 337 /** 338 * sk_set_memalloc - sets %SOCK_MEMALLOC 339 * @sk: socket to set it on 340 * 341 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. 342 * It's the responsibility of the admin to adjust min_free_kbytes 343 * to meet the requirements 344 */ 345 void sk_set_memalloc(struct sock *sk) 346 { 347 sock_set_flag(sk, SOCK_MEMALLOC); 348 sk->sk_allocation |= __GFP_MEMALLOC; 349 static_key_slow_inc(&memalloc_socks); 350 } 351 EXPORT_SYMBOL_GPL(sk_set_memalloc); 352 353 void sk_clear_memalloc(struct sock *sk) 354 { 355 sock_reset_flag(sk, SOCK_MEMALLOC); 356 sk->sk_allocation &= ~__GFP_MEMALLOC; 357 static_key_slow_dec(&memalloc_socks); 358 359 /* 360 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward 361 * progress of swapping. SOCK_MEMALLOC may be cleared while 362 * it has rmem allocations due to the last swapfile being deactivated 363 * but there is a risk that the socket is unusable due to exceeding 364 * the rmem limits. Reclaim the reserves and obey rmem limits again. 365 */ 366 sk_mem_reclaim(sk); 367 } 368 EXPORT_SYMBOL_GPL(sk_clear_memalloc); 369 370 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 371 { 372 int ret; 373 unsigned int noreclaim_flag; 374 375 /* these should have been dropped before queueing */ 376 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); 377 378 noreclaim_flag = memalloc_noreclaim_save(); 379 ret = sk->sk_backlog_rcv(sk, skb); 380 memalloc_noreclaim_restore(noreclaim_flag); 381 382 return ret; 383 } 384 EXPORT_SYMBOL(__sk_backlog_rcv); 385 386 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 387 { 388 struct timeval tv; 389 390 if (optlen < sizeof(tv)) 391 return -EINVAL; 392 if (copy_from_user(&tv, optval, sizeof(tv))) 393 return -EFAULT; 394 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 395 return -EDOM; 396 397 if (tv.tv_sec < 0) { 398 static int warned __read_mostly; 399 400 *timeo_p = 0; 401 if (warned < 10 && net_ratelimit()) { 402 warned++; 403 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", 404 __func__, current->comm, task_pid_nr(current)); 405 } 406 return 0; 407 } 408 *timeo_p = MAX_SCHEDULE_TIMEOUT; 409 if (tv.tv_sec == 0 && tv.tv_usec == 0) 410 return 0; 411 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 412 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); 413 return 0; 414 } 415 416 static void sock_warn_obsolete_bsdism(const char *name) 417 { 418 static int warned; 419 static char warncomm[TASK_COMM_LEN]; 420 if (strcmp(warncomm, current->comm) && warned < 5) { 421 strcpy(warncomm, current->comm); 422 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", 423 warncomm, name); 424 warned++; 425 } 426 } 427 428 static bool sock_needs_netstamp(const struct sock *sk) 429 { 430 switch (sk->sk_family) { 431 case AF_UNSPEC: 432 case AF_UNIX: 433 return false; 434 default: 435 return true; 436 } 437 } 438 439 static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 440 { 441 if (sk->sk_flags & flags) { 442 sk->sk_flags &= ~flags; 443 if (sock_needs_netstamp(sk) && 444 !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 445 net_disable_timestamp(); 446 } 447 } 448 449 450 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 451 { 452 unsigned long flags; 453 struct sk_buff_head *list = &sk->sk_receive_queue; 454 455 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 456 atomic_inc(&sk->sk_drops); 457 trace_sock_rcvqueue_full(sk, skb); 458 return -ENOMEM; 459 } 460 461 if (!sk_rmem_schedule(sk, skb, skb->truesize)) { 462 atomic_inc(&sk->sk_drops); 463 return -ENOBUFS; 464 } 465 466 skb->dev = NULL; 467 skb_set_owner_r(skb, sk); 468 469 /* we escape from rcu protected region, make sure we dont leak 470 * a norefcounted dst 471 */ 472 skb_dst_force(skb); 473 474 spin_lock_irqsave(&list->lock, flags); 475 sock_skb_set_dropcount(sk, skb); 476 __skb_queue_tail(list, skb); 477 spin_unlock_irqrestore(&list->lock, flags); 478 479 if (!sock_flag(sk, SOCK_DEAD)) 480 sk->sk_data_ready(sk); 481 return 0; 482 } 483 EXPORT_SYMBOL(__sock_queue_rcv_skb); 484 485 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 486 { 487 int err; 488 489 err = sk_filter(sk, skb); 490 if (err) 491 return err; 492 493 return __sock_queue_rcv_skb(sk, skb); 494 } 495 EXPORT_SYMBOL(sock_queue_rcv_skb); 496 497 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, 498 const int nested, unsigned int trim_cap, bool refcounted) 499 { 500 int rc = NET_RX_SUCCESS; 501 502 if (sk_filter_trim_cap(sk, skb, trim_cap)) 503 goto discard_and_relse; 504 505 skb->dev = NULL; 506 507 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { 508 atomic_inc(&sk->sk_drops); 509 goto discard_and_relse; 510 } 511 if (nested) 512 bh_lock_sock_nested(sk); 513 else 514 bh_lock_sock(sk); 515 if (!sock_owned_by_user(sk)) { 516 /* 517 * trylock + unlock semantics: 518 */ 519 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 520 521 rc = sk_backlog_rcv(sk, skb); 522 523 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 524 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 525 bh_unlock_sock(sk); 526 atomic_inc(&sk->sk_drops); 527 goto discard_and_relse; 528 } 529 530 bh_unlock_sock(sk); 531 out: 532 if (refcounted) 533 sock_put(sk); 534 return rc; 535 discard_and_relse: 536 kfree_skb(skb); 537 goto out; 538 } 539 EXPORT_SYMBOL(__sk_receive_skb); 540 541 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 542 { 543 struct dst_entry *dst = __sk_dst_get(sk); 544 545 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 546 sk_tx_queue_clear(sk); 547 sk->sk_dst_pending_confirm = 0; 548 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 549 dst_release(dst); 550 return NULL; 551 } 552 553 return dst; 554 } 555 EXPORT_SYMBOL(__sk_dst_check); 556 557 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 558 { 559 struct dst_entry *dst = sk_dst_get(sk); 560 561 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 562 sk_dst_reset(sk); 563 dst_release(dst); 564 return NULL; 565 } 566 567 return dst; 568 } 569 EXPORT_SYMBOL(sk_dst_check); 570 571 static int sock_setbindtodevice(struct sock *sk, char __user *optval, 572 int optlen) 573 { 574 int ret = -ENOPROTOOPT; 575 #ifdef CONFIG_NETDEVICES 576 struct net *net = sock_net(sk); 577 char devname[IFNAMSIZ]; 578 int index; 579 580 /* Sorry... */ 581 ret = -EPERM; 582 if (!ns_capable(net->user_ns, CAP_NET_RAW)) 583 goto out; 584 585 ret = -EINVAL; 586 if (optlen < 0) 587 goto out; 588 589 /* Bind this socket to a particular device like "eth0", 590 * as specified in the passed interface name. If the 591 * name is "" or the option length is zero the socket 592 * is not bound. 593 */ 594 if (optlen > IFNAMSIZ - 1) 595 optlen = IFNAMSIZ - 1; 596 memset(devname, 0, sizeof(devname)); 597 598 ret = -EFAULT; 599 if (copy_from_user(devname, optval, optlen)) 600 goto out; 601 602 index = 0; 603 if (devname[0] != '\0') { 604 struct net_device *dev; 605 606 rcu_read_lock(); 607 dev = dev_get_by_name_rcu(net, devname); 608 if (dev) 609 index = dev->ifindex; 610 rcu_read_unlock(); 611 ret = -ENODEV; 612 if (!dev) 613 goto out; 614 } 615 616 lock_sock(sk); 617 sk->sk_bound_dev_if = index; 618 sk_dst_reset(sk); 619 release_sock(sk); 620 621 ret = 0; 622 623 out: 624 #endif 625 626 return ret; 627 } 628 629 static int sock_getbindtodevice(struct sock *sk, char __user *optval, 630 int __user *optlen, int len) 631 { 632 int ret = -ENOPROTOOPT; 633 #ifdef CONFIG_NETDEVICES 634 struct net *net = sock_net(sk); 635 char devname[IFNAMSIZ]; 636 637 if (sk->sk_bound_dev_if == 0) { 638 len = 0; 639 goto zero; 640 } 641 642 ret = -EINVAL; 643 if (len < IFNAMSIZ) 644 goto out; 645 646 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if); 647 if (ret) 648 goto out; 649 650 len = strlen(devname) + 1; 651 652 ret = -EFAULT; 653 if (copy_to_user(optval, devname, len)) 654 goto out; 655 656 zero: 657 ret = -EFAULT; 658 if (put_user(len, optlen)) 659 goto out; 660 661 ret = 0; 662 663 out: 664 #endif 665 666 return ret; 667 } 668 669 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 670 { 671 if (valbool) 672 sock_set_flag(sk, bit); 673 else 674 sock_reset_flag(sk, bit); 675 } 676 677 bool sk_mc_loop(struct sock *sk) 678 { 679 if (dev_recursion_level()) 680 return false; 681 if (!sk) 682 return true; 683 switch (sk->sk_family) { 684 case AF_INET: 685 return inet_sk(sk)->mc_loop; 686 #if IS_ENABLED(CONFIG_IPV6) 687 case AF_INET6: 688 return inet6_sk(sk)->mc_loop; 689 #endif 690 } 691 WARN_ON(1); 692 return true; 693 } 694 EXPORT_SYMBOL(sk_mc_loop); 695 696 /* 697 * This is meant for all protocols to use and covers goings on 698 * at the socket level. Everything here is generic. 699 */ 700 701 int sock_setsockopt(struct socket *sock, int level, int optname, 702 char __user *optval, unsigned int optlen) 703 { 704 struct sock *sk = sock->sk; 705 int val; 706 int valbool; 707 struct linger ling; 708 int ret = 0; 709 710 /* 711 * Options without arguments 712 */ 713 714 if (optname == SO_BINDTODEVICE) 715 return sock_setbindtodevice(sk, optval, optlen); 716 717 if (optlen < sizeof(int)) 718 return -EINVAL; 719 720 if (get_user(val, (int __user *)optval)) 721 return -EFAULT; 722 723 valbool = val ? 1 : 0; 724 725 lock_sock(sk); 726 727 switch (optname) { 728 case SO_DEBUG: 729 if (val && !capable(CAP_NET_ADMIN)) 730 ret = -EACCES; 731 else 732 sock_valbool_flag(sk, SOCK_DBG, valbool); 733 break; 734 case SO_REUSEADDR: 735 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 736 break; 737 case SO_REUSEPORT: 738 sk->sk_reuseport = valbool; 739 break; 740 case SO_TYPE: 741 case SO_PROTOCOL: 742 case SO_DOMAIN: 743 case SO_ERROR: 744 ret = -ENOPROTOOPT; 745 break; 746 case SO_DONTROUTE: 747 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 748 break; 749 case SO_BROADCAST: 750 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 751 break; 752 case SO_SNDBUF: 753 /* Don't error on this BSD doesn't and if you think 754 * about it this is right. Otherwise apps have to 755 * play 'guess the biggest size' games. RCVBUF/SNDBUF 756 * are treated in BSD as hints 757 */ 758 val = min_t(u32, val, sysctl_wmem_max); 759 set_sndbuf: 760 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 761 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); 762 /* Wake up sending tasks if we upped the value. */ 763 sk->sk_write_space(sk); 764 break; 765 766 case SO_SNDBUFFORCE: 767 if (!capable(CAP_NET_ADMIN)) { 768 ret = -EPERM; 769 break; 770 } 771 goto set_sndbuf; 772 773 case SO_RCVBUF: 774 /* Don't error on this BSD doesn't and if you think 775 * about it this is right. Otherwise apps have to 776 * play 'guess the biggest size' games. RCVBUF/SNDBUF 777 * are treated in BSD as hints 778 */ 779 val = min_t(u32, val, sysctl_rmem_max); 780 set_rcvbuf: 781 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 782 /* 783 * We double it on the way in to account for 784 * "struct sk_buff" etc. overhead. Applications 785 * assume that the SO_RCVBUF setting they make will 786 * allow that much actual data to be received on that 787 * socket. 788 * 789 * Applications are unaware that "struct sk_buff" and 790 * other overheads allocate from the receive buffer 791 * during socket buffer allocation. 792 * 793 * And after considering the possible alternatives, 794 * returning the value we actually used in getsockopt 795 * is the most desirable behavior. 796 */ 797 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); 798 break; 799 800 case SO_RCVBUFFORCE: 801 if (!capable(CAP_NET_ADMIN)) { 802 ret = -EPERM; 803 break; 804 } 805 goto set_rcvbuf; 806 807 case SO_KEEPALIVE: 808 if (sk->sk_prot->keepalive) 809 sk->sk_prot->keepalive(sk, valbool); 810 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 811 break; 812 813 case SO_OOBINLINE: 814 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 815 break; 816 817 case SO_NO_CHECK: 818 sk->sk_no_check_tx = valbool; 819 break; 820 821 case SO_PRIORITY: 822 if ((val >= 0 && val <= 6) || 823 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 824 sk->sk_priority = val; 825 else 826 ret = -EPERM; 827 break; 828 829 case SO_LINGER: 830 if (optlen < sizeof(ling)) { 831 ret = -EINVAL; /* 1003.1g */ 832 break; 833 } 834 if (copy_from_user(&ling, optval, sizeof(ling))) { 835 ret = -EFAULT; 836 break; 837 } 838 if (!ling.l_onoff) 839 sock_reset_flag(sk, SOCK_LINGER); 840 else { 841 #if (BITS_PER_LONG == 32) 842 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 843 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 844 else 845 #endif 846 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 847 sock_set_flag(sk, SOCK_LINGER); 848 } 849 break; 850 851 case SO_BSDCOMPAT: 852 sock_warn_obsolete_bsdism("setsockopt"); 853 break; 854 855 case SO_PASSCRED: 856 if (valbool) 857 set_bit(SOCK_PASSCRED, &sock->flags); 858 else 859 clear_bit(SOCK_PASSCRED, &sock->flags); 860 break; 861 862 case SO_TIMESTAMP: 863 case SO_TIMESTAMPNS: 864 if (valbool) { 865 if (optname == SO_TIMESTAMP) 866 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 867 else 868 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 869 sock_set_flag(sk, SOCK_RCVTSTAMP); 870 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 871 } else { 872 sock_reset_flag(sk, SOCK_RCVTSTAMP); 873 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 874 } 875 break; 876 877 case SO_TIMESTAMPING: 878 if (val & ~SOF_TIMESTAMPING_MASK) { 879 ret = -EINVAL; 880 break; 881 } 882 883 if (val & SOF_TIMESTAMPING_OPT_ID && 884 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { 885 if (sk->sk_protocol == IPPROTO_TCP && 886 sk->sk_type == SOCK_STREAM) { 887 if ((1 << sk->sk_state) & 888 (TCPF_CLOSE | TCPF_LISTEN)) { 889 ret = -EINVAL; 890 break; 891 } 892 sk->sk_tskey = tcp_sk(sk)->snd_una; 893 } else { 894 sk->sk_tskey = 0; 895 } 896 } 897 898 if (val & SOF_TIMESTAMPING_OPT_STATS && 899 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) { 900 ret = -EINVAL; 901 break; 902 } 903 904 sk->sk_tsflags = val; 905 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 906 sock_enable_timestamp(sk, 907 SOCK_TIMESTAMPING_RX_SOFTWARE); 908 else 909 sock_disable_timestamp(sk, 910 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 911 break; 912 913 case SO_RCVLOWAT: 914 if (val < 0) 915 val = INT_MAX; 916 sk->sk_rcvlowat = val ? : 1; 917 break; 918 919 case SO_RCVTIMEO: 920 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 921 break; 922 923 case SO_SNDTIMEO: 924 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 925 break; 926 927 case SO_ATTACH_FILTER: 928 ret = -EINVAL; 929 if (optlen == sizeof(struct sock_fprog)) { 930 struct sock_fprog fprog; 931 932 ret = -EFAULT; 933 if (copy_from_user(&fprog, optval, sizeof(fprog))) 934 break; 935 936 ret = sk_attach_filter(&fprog, sk); 937 } 938 break; 939 940 case SO_ATTACH_BPF: 941 ret = -EINVAL; 942 if (optlen == sizeof(u32)) { 943 u32 ufd; 944 945 ret = -EFAULT; 946 if (copy_from_user(&ufd, optval, sizeof(ufd))) 947 break; 948 949 ret = sk_attach_bpf(ufd, sk); 950 } 951 break; 952 953 case SO_ATTACH_REUSEPORT_CBPF: 954 ret = -EINVAL; 955 if (optlen == sizeof(struct sock_fprog)) { 956 struct sock_fprog fprog; 957 958 ret = -EFAULT; 959 if (copy_from_user(&fprog, optval, sizeof(fprog))) 960 break; 961 962 ret = sk_reuseport_attach_filter(&fprog, sk); 963 } 964 break; 965 966 case SO_ATTACH_REUSEPORT_EBPF: 967 ret = -EINVAL; 968 if (optlen == sizeof(u32)) { 969 u32 ufd; 970 971 ret = -EFAULT; 972 if (copy_from_user(&ufd, optval, sizeof(ufd))) 973 break; 974 975 ret = sk_reuseport_attach_bpf(ufd, sk); 976 } 977 break; 978 979 case SO_DETACH_FILTER: 980 ret = sk_detach_filter(sk); 981 break; 982 983 case SO_LOCK_FILTER: 984 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool) 985 ret = -EPERM; 986 else 987 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); 988 break; 989 990 case SO_PASSSEC: 991 if (valbool) 992 set_bit(SOCK_PASSSEC, &sock->flags); 993 else 994 clear_bit(SOCK_PASSSEC, &sock->flags); 995 break; 996 case SO_MARK: 997 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 998 ret = -EPERM; 999 else 1000 sk->sk_mark = val; 1001 break; 1002 1003 case SO_RXQ_OVFL: 1004 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 1005 break; 1006 1007 case SO_WIFI_STATUS: 1008 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 1009 break; 1010 1011 case SO_PEEK_OFF: 1012 if (sock->ops->set_peek_off) 1013 ret = sock->ops->set_peek_off(sk, val); 1014 else 1015 ret = -EOPNOTSUPP; 1016 break; 1017 1018 case SO_NOFCS: 1019 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 1020 break; 1021 1022 case SO_SELECT_ERR_QUEUE: 1023 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); 1024 break; 1025 1026 #ifdef CONFIG_NET_RX_BUSY_POLL 1027 case SO_BUSY_POLL: 1028 /* allow unprivileged users to decrease the value */ 1029 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) 1030 ret = -EPERM; 1031 else { 1032 if (val < 0) 1033 ret = -EINVAL; 1034 else 1035 sk->sk_ll_usec = val; 1036 } 1037 break; 1038 #endif 1039 1040 case SO_MAX_PACING_RATE: 1041 if (val != ~0U) 1042 cmpxchg(&sk->sk_pacing_status, 1043 SK_PACING_NONE, 1044 SK_PACING_NEEDED); 1045 sk->sk_max_pacing_rate = val; 1046 sk->sk_pacing_rate = min(sk->sk_pacing_rate, 1047 sk->sk_max_pacing_rate); 1048 break; 1049 1050 case SO_INCOMING_CPU: 1051 sk->sk_incoming_cpu = val; 1052 break; 1053 1054 case SO_CNX_ADVICE: 1055 if (val == 1) 1056 dst_negative_advice(sk); 1057 break; 1058 default: 1059 ret = -ENOPROTOOPT; 1060 break; 1061 } 1062 release_sock(sk); 1063 return ret; 1064 } 1065 EXPORT_SYMBOL(sock_setsockopt); 1066 1067 1068 static void cred_to_ucred(struct pid *pid, const struct cred *cred, 1069 struct ucred *ucred) 1070 { 1071 ucred->pid = pid_vnr(pid); 1072 ucred->uid = ucred->gid = -1; 1073 if (cred) { 1074 struct user_namespace *current_ns = current_user_ns(); 1075 1076 ucred->uid = from_kuid_munged(current_ns, cred->euid); 1077 ucred->gid = from_kgid_munged(current_ns, cred->egid); 1078 } 1079 } 1080 1081 static int groups_to_user(gid_t __user *dst, const struct group_info *src) 1082 { 1083 struct user_namespace *user_ns = current_user_ns(); 1084 int i; 1085 1086 for (i = 0; i < src->ngroups; i++) 1087 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) 1088 return -EFAULT; 1089 1090 return 0; 1091 } 1092 1093 int sock_getsockopt(struct socket *sock, int level, int optname, 1094 char __user *optval, int __user *optlen) 1095 { 1096 struct sock *sk = sock->sk; 1097 1098 union { 1099 int val; 1100 u64 val64; 1101 struct linger ling; 1102 struct timeval tm; 1103 } v; 1104 1105 int lv = sizeof(int); 1106 int len; 1107 1108 if (get_user(len, optlen)) 1109 return -EFAULT; 1110 if (len < 0) 1111 return -EINVAL; 1112 1113 memset(&v, 0, sizeof(v)); 1114 1115 switch (optname) { 1116 case SO_DEBUG: 1117 v.val = sock_flag(sk, SOCK_DBG); 1118 break; 1119 1120 case SO_DONTROUTE: 1121 v.val = sock_flag(sk, SOCK_LOCALROUTE); 1122 break; 1123 1124 case SO_BROADCAST: 1125 v.val = sock_flag(sk, SOCK_BROADCAST); 1126 break; 1127 1128 case SO_SNDBUF: 1129 v.val = sk->sk_sndbuf; 1130 break; 1131 1132 case SO_RCVBUF: 1133 v.val = sk->sk_rcvbuf; 1134 break; 1135 1136 case SO_REUSEADDR: 1137 v.val = sk->sk_reuse; 1138 break; 1139 1140 case SO_REUSEPORT: 1141 v.val = sk->sk_reuseport; 1142 break; 1143 1144 case SO_KEEPALIVE: 1145 v.val = sock_flag(sk, SOCK_KEEPOPEN); 1146 break; 1147 1148 case SO_TYPE: 1149 v.val = sk->sk_type; 1150 break; 1151 1152 case SO_PROTOCOL: 1153 v.val = sk->sk_protocol; 1154 break; 1155 1156 case SO_DOMAIN: 1157 v.val = sk->sk_family; 1158 break; 1159 1160 case SO_ERROR: 1161 v.val = -sock_error(sk); 1162 if (v.val == 0) 1163 v.val = xchg(&sk->sk_err_soft, 0); 1164 break; 1165 1166 case SO_OOBINLINE: 1167 v.val = sock_flag(sk, SOCK_URGINLINE); 1168 break; 1169 1170 case SO_NO_CHECK: 1171 v.val = sk->sk_no_check_tx; 1172 break; 1173 1174 case SO_PRIORITY: 1175 v.val = sk->sk_priority; 1176 break; 1177 1178 case SO_LINGER: 1179 lv = sizeof(v.ling); 1180 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); 1181 v.ling.l_linger = sk->sk_lingertime / HZ; 1182 break; 1183 1184 case SO_BSDCOMPAT: 1185 sock_warn_obsolete_bsdism("getsockopt"); 1186 break; 1187 1188 case SO_TIMESTAMP: 1189 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 1190 !sock_flag(sk, SOCK_RCVTSTAMPNS); 1191 break; 1192 1193 case SO_TIMESTAMPNS: 1194 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 1195 break; 1196 1197 case SO_TIMESTAMPING: 1198 v.val = sk->sk_tsflags; 1199 break; 1200 1201 case SO_RCVTIMEO: 1202 lv = sizeof(struct timeval); 1203 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 1204 v.tm.tv_sec = 0; 1205 v.tm.tv_usec = 0; 1206 } else { 1207 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 1208 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; 1209 } 1210 break; 1211 1212 case SO_SNDTIMEO: 1213 lv = sizeof(struct timeval); 1214 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 1215 v.tm.tv_sec = 0; 1216 v.tm.tv_usec = 0; 1217 } else { 1218 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 1219 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; 1220 } 1221 break; 1222 1223 case SO_RCVLOWAT: 1224 v.val = sk->sk_rcvlowat; 1225 break; 1226 1227 case SO_SNDLOWAT: 1228 v.val = 1; 1229 break; 1230 1231 case SO_PASSCRED: 1232 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); 1233 break; 1234 1235 case SO_PEERCRED: 1236 { 1237 struct ucred peercred; 1238 if (len > sizeof(peercred)) 1239 len = sizeof(peercred); 1240 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 1241 if (copy_to_user(optval, &peercred, len)) 1242 return -EFAULT; 1243 goto lenout; 1244 } 1245 1246 case SO_PEERGROUPS: 1247 { 1248 int ret, n; 1249 1250 if (!sk->sk_peer_cred) 1251 return -ENODATA; 1252 1253 n = sk->sk_peer_cred->group_info->ngroups; 1254 if (len < n * sizeof(gid_t)) { 1255 len = n * sizeof(gid_t); 1256 return put_user(len, optlen) ? -EFAULT : -ERANGE; 1257 } 1258 len = n * sizeof(gid_t); 1259 1260 ret = groups_to_user((gid_t __user *)optval, 1261 sk->sk_peer_cred->group_info); 1262 if (ret) 1263 return ret; 1264 goto lenout; 1265 } 1266 1267 case SO_PEERNAME: 1268 { 1269 char address[128]; 1270 1271 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 1272 return -ENOTCONN; 1273 if (lv < len) 1274 return -EINVAL; 1275 if (copy_to_user(optval, address, len)) 1276 return -EFAULT; 1277 goto lenout; 1278 } 1279 1280 /* Dubious BSD thing... Probably nobody even uses it, but 1281 * the UNIX standard wants it for whatever reason... -DaveM 1282 */ 1283 case SO_ACCEPTCONN: 1284 v.val = sk->sk_state == TCP_LISTEN; 1285 break; 1286 1287 case SO_PASSSEC: 1288 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); 1289 break; 1290 1291 case SO_PEERSEC: 1292 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1293 1294 case SO_MARK: 1295 v.val = sk->sk_mark; 1296 break; 1297 1298 case SO_RXQ_OVFL: 1299 v.val = sock_flag(sk, SOCK_RXQ_OVFL); 1300 break; 1301 1302 case SO_WIFI_STATUS: 1303 v.val = sock_flag(sk, SOCK_WIFI_STATUS); 1304 break; 1305 1306 case SO_PEEK_OFF: 1307 if (!sock->ops->set_peek_off) 1308 return -EOPNOTSUPP; 1309 1310 v.val = sk->sk_peek_off; 1311 break; 1312 case SO_NOFCS: 1313 v.val = sock_flag(sk, SOCK_NOFCS); 1314 break; 1315 1316 case SO_BINDTODEVICE: 1317 return sock_getbindtodevice(sk, optval, optlen, len); 1318 1319 case SO_GET_FILTER: 1320 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); 1321 if (len < 0) 1322 return len; 1323 1324 goto lenout; 1325 1326 case SO_LOCK_FILTER: 1327 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1328 break; 1329 1330 case SO_BPF_EXTENSIONS: 1331 v.val = bpf_tell_extensions(); 1332 break; 1333 1334 case SO_SELECT_ERR_QUEUE: 1335 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1336 break; 1337 1338 #ifdef CONFIG_NET_RX_BUSY_POLL 1339 case SO_BUSY_POLL: 1340 v.val = sk->sk_ll_usec; 1341 break; 1342 #endif 1343 1344 case SO_MAX_PACING_RATE: 1345 v.val = sk->sk_max_pacing_rate; 1346 break; 1347 1348 case SO_INCOMING_CPU: 1349 v.val = sk->sk_incoming_cpu; 1350 break; 1351 1352 case SO_MEMINFO: 1353 { 1354 u32 meminfo[SK_MEMINFO_VARS]; 1355 1356 if (get_user(len, optlen)) 1357 return -EFAULT; 1358 1359 sk_get_meminfo(sk, meminfo); 1360 1361 len = min_t(unsigned int, len, sizeof(meminfo)); 1362 if (copy_to_user(optval, &meminfo, len)) 1363 return -EFAULT; 1364 1365 goto lenout; 1366 } 1367 1368 #ifdef CONFIG_NET_RX_BUSY_POLL 1369 case SO_INCOMING_NAPI_ID: 1370 v.val = READ_ONCE(sk->sk_napi_id); 1371 1372 /* aggregate non-NAPI IDs down to 0 */ 1373 if (v.val < MIN_NAPI_ID) 1374 v.val = 0; 1375 1376 break; 1377 #endif 1378 1379 case SO_COOKIE: 1380 lv = sizeof(u64); 1381 if (len < lv) 1382 return -EINVAL; 1383 v.val64 = sock_gen_cookie(sk); 1384 break; 1385 1386 default: 1387 /* We implement the SO_SNDLOWAT etc to not be settable 1388 * (1003.1g 7). 1389 */ 1390 return -ENOPROTOOPT; 1391 } 1392 1393 if (len > lv) 1394 len = lv; 1395 if (copy_to_user(optval, &v, len)) 1396 return -EFAULT; 1397 lenout: 1398 if (put_user(len, optlen)) 1399 return -EFAULT; 1400 return 0; 1401 } 1402 1403 /* 1404 * Initialize an sk_lock. 1405 * 1406 * (We also register the sk_lock with the lock validator.) 1407 */ 1408 static inline void sock_lock_init(struct sock *sk) 1409 { 1410 if (sk->sk_kern_sock) 1411 sock_lock_init_class_and_name( 1412 sk, 1413 af_family_kern_slock_key_strings[sk->sk_family], 1414 af_family_kern_slock_keys + sk->sk_family, 1415 af_family_kern_key_strings[sk->sk_family], 1416 af_family_kern_keys + sk->sk_family); 1417 else 1418 sock_lock_init_class_and_name( 1419 sk, 1420 af_family_slock_key_strings[sk->sk_family], 1421 af_family_slock_keys + sk->sk_family, 1422 af_family_key_strings[sk->sk_family], 1423 af_family_keys + sk->sk_family); 1424 } 1425 1426 /* 1427 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1428 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1429 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1430 */ 1431 static void sock_copy(struct sock *nsk, const struct sock *osk) 1432 { 1433 #ifdef CONFIG_SECURITY_NETWORK 1434 void *sptr = nsk->sk_security; 1435 #endif 1436 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1437 1438 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1439 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1440 1441 #ifdef CONFIG_SECURITY_NETWORK 1442 nsk->sk_security = sptr; 1443 security_sk_clone(osk, nsk); 1444 #endif 1445 } 1446 1447 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1448 int family) 1449 { 1450 struct sock *sk; 1451 struct kmem_cache *slab; 1452 1453 slab = prot->slab; 1454 if (slab != NULL) { 1455 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1456 if (!sk) 1457 return sk; 1458 if (priority & __GFP_ZERO) 1459 sk_prot_clear_nulls(sk, prot->obj_size); 1460 } else 1461 sk = kmalloc(prot->obj_size, priority); 1462 1463 if (sk != NULL) { 1464 kmemcheck_annotate_bitfield(sk, flags); 1465 1466 if (security_sk_alloc(sk, family, priority)) 1467 goto out_free; 1468 1469 if (!try_module_get(prot->owner)) 1470 goto out_free_sec; 1471 sk_tx_queue_clear(sk); 1472 } 1473 1474 return sk; 1475 1476 out_free_sec: 1477 security_sk_free(sk); 1478 out_free: 1479 if (slab != NULL) 1480 kmem_cache_free(slab, sk); 1481 else 1482 kfree(sk); 1483 return NULL; 1484 } 1485 1486 static void sk_prot_free(struct proto *prot, struct sock *sk) 1487 { 1488 struct kmem_cache *slab; 1489 struct module *owner; 1490 1491 owner = prot->owner; 1492 slab = prot->slab; 1493 1494 cgroup_sk_free(&sk->sk_cgrp_data); 1495 mem_cgroup_sk_free(sk); 1496 security_sk_free(sk); 1497 if (slab != NULL) 1498 kmem_cache_free(slab, sk); 1499 else 1500 kfree(sk); 1501 module_put(owner); 1502 } 1503 1504 /** 1505 * sk_alloc - All socket objects are allocated here 1506 * @net: the applicable net namespace 1507 * @family: protocol family 1508 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1509 * @prot: struct proto associated with this new sock instance 1510 * @kern: is this to be a kernel socket? 1511 */ 1512 struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1513 struct proto *prot, int kern) 1514 { 1515 struct sock *sk; 1516 1517 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1518 if (sk) { 1519 sk->sk_family = family; 1520 /* 1521 * See comment in struct sock definition to understand 1522 * why we need sk_prot_creator -acme 1523 */ 1524 sk->sk_prot = sk->sk_prot_creator = prot; 1525 sk->sk_kern_sock = kern; 1526 sock_lock_init(sk); 1527 sk->sk_net_refcnt = kern ? 0 : 1; 1528 if (likely(sk->sk_net_refcnt)) 1529 get_net(net); 1530 sock_net_set(sk, net); 1531 refcount_set(&sk->sk_wmem_alloc, 1); 1532 1533 mem_cgroup_sk_alloc(sk); 1534 cgroup_sk_alloc(&sk->sk_cgrp_data); 1535 sock_update_classid(&sk->sk_cgrp_data); 1536 sock_update_netprioidx(&sk->sk_cgrp_data); 1537 } 1538 1539 return sk; 1540 } 1541 EXPORT_SYMBOL(sk_alloc); 1542 1543 /* Sockets having SOCK_RCU_FREE will call this function after one RCU 1544 * grace period. This is the case for UDP sockets and TCP listeners. 1545 */ 1546 static void __sk_destruct(struct rcu_head *head) 1547 { 1548 struct sock *sk = container_of(head, struct sock, sk_rcu); 1549 struct sk_filter *filter; 1550 1551 if (sk->sk_destruct) 1552 sk->sk_destruct(sk); 1553 1554 filter = rcu_dereference_check(sk->sk_filter, 1555 refcount_read(&sk->sk_wmem_alloc) == 0); 1556 if (filter) { 1557 sk_filter_uncharge(sk, filter); 1558 RCU_INIT_POINTER(sk->sk_filter, NULL); 1559 } 1560 if (rcu_access_pointer(sk->sk_reuseport_cb)) 1561 reuseport_detach_sock(sk); 1562 1563 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1564 1565 if (atomic_read(&sk->sk_omem_alloc)) 1566 pr_debug("%s: optmem leakage (%d bytes) detected\n", 1567 __func__, atomic_read(&sk->sk_omem_alloc)); 1568 1569 if (sk->sk_frag.page) { 1570 put_page(sk->sk_frag.page); 1571 sk->sk_frag.page = NULL; 1572 } 1573 1574 if (sk->sk_peer_cred) 1575 put_cred(sk->sk_peer_cred); 1576 put_pid(sk->sk_peer_pid); 1577 if (likely(sk->sk_net_refcnt)) 1578 put_net(sock_net(sk)); 1579 sk_prot_free(sk->sk_prot_creator, sk); 1580 } 1581 1582 void sk_destruct(struct sock *sk) 1583 { 1584 if (sock_flag(sk, SOCK_RCU_FREE)) 1585 call_rcu(&sk->sk_rcu, __sk_destruct); 1586 else 1587 __sk_destruct(&sk->sk_rcu); 1588 } 1589 1590 static void __sk_free(struct sock *sk) 1591 { 1592 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) 1593 sock_diag_broadcast_destroy(sk); 1594 else 1595 sk_destruct(sk); 1596 } 1597 1598 void sk_free(struct sock *sk) 1599 { 1600 /* 1601 * We subtract one from sk_wmem_alloc and can know if 1602 * some packets are still in some tx queue. 1603 * If not null, sock_wfree() will call __sk_free(sk) later 1604 */ 1605 if (refcount_dec_and_test(&sk->sk_wmem_alloc)) 1606 __sk_free(sk); 1607 } 1608 EXPORT_SYMBOL(sk_free); 1609 1610 static void sk_init_common(struct sock *sk) 1611 { 1612 skb_queue_head_init(&sk->sk_receive_queue); 1613 skb_queue_head_init(&sk->sk_write_queue); 1614 skb_queue_head_init(&sk->sk_error_queue); 1615 1616 rwlock_init(&sk->sk_callback_lock); 1617 lockdep_set_class_and_name(&sk->sk_receive_queue.lock, 1618 af_rlock_keys + sk->sk_family, 1619 af_family_rlock_key_strings[sk->sk_family]); 1620 lockdep_set_class_and_name(&sk->sk_write_queue.lock, 1621 af_wlock_keys + sk->sk_family, 1622 af_family_wlock_key_strings[sk->sk_family]); 1623 lockdep_set_class_and_name(&sk->sk_error_queue.lock, 1624 af_elock_keys + sk->sk_family, 1625 af_family_elock_key_strings[sk->sk_family]); 1626 lockdep_set_class_and_name(&sk->sk_callback_lock, 1627 af_callback_keys + sk->sk_family, 1628 af_family_clock_key_strings[sk->sk_family]); 1629 } 1630 1631 /** 1632 * sk_clone_lock - clone a socket, and lock its clone 1633 * @sk: the socket to clone 1634 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1635 * 1636 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1637 */ 1638 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1639 { 1640 struct sock *newsk; 1641 bool is_charged = true; 1642 1643 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1644 if (newsk != NULL) { 1645 struct sk_filter *filter; 1646 1647 sock_copy(newsk, sk); 1648 1649 /* SANITY */ 1650 if (likely(newsk->sk_net_refcnt)) 1651 get_net(sock_net(newsk)); 1652 sk_node_init(&newsk->sk_node); 1653 sock_lock_init(newsk); 1654 bh_lock_sock(newsk); 1655 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1656 newsk->sk_backlog.len = 0; 1657 1658 atomic_set(&newsk->sk_rmem_alloc, 0); 1659 /* 1660 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) 1661 */ 1662 refcount_set(&newsk->sk_wmem_alloc, 1); 1663 atomic_set(&newsk->sk_omem_alloc, 0); 1664 sk_init_common(newsk); 1665 1666 newsk->sk_dst_cache = NULL; 1667 newsk->sk_dst_pending_confirm = 0; 1668 newsk->sk_wmem_queued = 0; 1669 newsk->sk_forward_alloc = 0; 1670 atomic_set(&newsk->sk_drops, 0); 1671 newsk->sk_send_head = NULL; 1672 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1673 1674 sock_reset_flag(newsk, SOCK_DONE); 1675 1676 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1677 if (filter != NULL) 1678 /* though it's an empty new sock, the charging may fail 1679 * if sysctl_optmem_max was changed between creation of 1680 * original socket and cloning 1681 */ 1682 is_charged = sk_filter_charge(newsk, filter); 1683 1684 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { 1685 /* We need to make sure that we don't uncharge the new 1686 * socket if we couldn't charge it in the first place 1687 * as otherwise we uncharge the parent's filter. 1688 */ 1689 if (!is_charged) 1690 RCU_INIT_POINTER(newsk->sk_filter, NULL); 1691 sk_free_unlock_clone(newsk); 1692 newsk = NULL; 1693 goto out; 1694 } 1695 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); 1696 1697 newsk->sk_err = 0; 1698 newsk->sk_err_soft = 0; 1699 newsk->sk_priority = 0; 1700 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1701 atomic64_set(&newsk->sk_cookie, 0); 1702 1703 mem_cgroup_sk_alloc(newsk); 1704 cgroup_sk_alloc(&newsk->sk_cgrp_data); 1705 1706 /* 1707 * Before updating sk_refcnt, we must commit prior changes to memory 1708 * (Documentation/RCU/rculist_nulls.txt for details) 1709 */ 1710 smp_wmb(); 1711 refcount_set(&newsk->sk_refcnt, 2); 1712 1713 /* 1714 * Increment the counter in the same struct proto as the master 1715 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 1716 * is the same as sk->sk_prot->socks, as this field was copied 1717 * with memcpy). 1718 * 1719 * This _changes_ the previous behaviour, where 1720 * tcp_create_openreq_child always was incrementing the 1721 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 1722 * to be taken into account in all callers. -acme 1723 */ 1724 sk_refcnt_debug_inc(newsk); 1725 sk_set_socket(newsk, NULL); 1726 newsk->sk_wq = NULL; 1727 1728 if (newsk->sk_prot->sockets_allocated) 1729 sk_sockets_allocated_inc(newsk); 1730 1731 if (sock_needs_netstamp(sk) && 1732 newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1733 net_enable_timestamp(); 1734 } 1735 out: 1736 return newsk; 1737 } 1738 EXPORT_SYMBOL_GPL(sk_clone_lock); 1739 1740 void sk_free_unlock_clone(struct sock *sk) 1741 { 1742 /* It is still raw copy of parent, so invalidate 1743 * destructor and make plain sk_free() */ 1744 sk->sk_destruct = NULL; 1745 bh_unlock_sock(sk); 1746 sk_free(sk); 1747 } 1748 EXPORT_SYMBOL_GPL(sk_free_unlock_clone); 1749 1750 void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1751 { 1752 u32 max_segs = 1; 1753 1754 sk_dst_set(sk, dst); 1755 sk->sk_route_caps = dst->dev->features; 1756 if (sk->sk_route_caps & NETIF_F_GSO) 1757 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1758 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1759 if (sk_can_gso(sk)) { 1760 if (dst->header_len) { 1761 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1762 } else { 1763 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1764 sk->sk_gso_max_size = dst->dev->gso_max_size; 1765 max_segs = max_t(u32, dst->dev->gso_max_segs, 1); 1766 } 1767 } 1768 sk->sk_gso_max_segs = max_segs; 1769 } 1770 EXPORT_SYMBOL_GPL(sk_setup_caps); 1771 1772 /* 1773 * Simple resource managers for sockets. 1774 */ 1775 1776 1777 /* 1778 * Write buffer destructor automatically called from kfree_skb. 1779 */ 1780 void sock_wfree(struct sk_buff *skb) 1781 { 1782 struct sock *sk = skb->sk; 1783 unsigned int len = skb->truesize; 1784 1785 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 1786 /* 1787 * Keep a reference on sk_wmem_alloc, this will be released 1788 * after sk_write_space() call 1789 */ 1790 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); 1791 sk->sk_write_space(sk); 1792 len = 1; 1793 } 1794 /* 1795 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 1796 * could not do because of in-flight packets 1797 */ 1798 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) 1799 __sk_free(sk); 1800 } 1801 EXPORT_SYMBOL(sock_wfree); 1802 1803 /* This variant of sock_wfree() is used by TCP, 1804 * since it sets SOCK_USE_WRITE_QUEUE. 1805 */ 1806 void __sock_wfree(struct sk_buff *skb) 1807 { 1808 struct sock *sk = skb->sk; 1809 1810 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) 1811 __sk_free(sk); 1812 } 1813 1814 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 1815 { 1816 skb_orphan(skb); 1817 skb->sk = sk; 1818 #ifdef CONFIG_INET 1819 if (unlikely(!sk_fullsock(sk))) { 1820 skb->destructor = sock_edemux; 1821 sock_hold(sk); 1822 return; 1823 } 1824 #endif 1825 skb->destructor = sock_wfree; 1826 skb_set_hash_from_sk(skb, sk); 1827 /* 1828 * We used to take a refcount on sk, but following operation 1829 * is enough to guarantee sk_free() wont free this sock until 1830 * all in-flight packets are completed 1831 */ 1832 refcount_add(skb->truesize, &sk->sk_wmem_alloc); 1833 } 1834 EXPORT_SYMBOL(skb_set_owner_w); 1835 1836 /* This helper is used by netem, as it can hold packets in its 1837 * delay queue. We want to allow the owner socket to send more 1838 * packets, as if they were already TX completed by a typical driver. 1839 * But we also want to keep skb->sk set because some packet schedulers 1840 * rely on it (sch_fq for example). 1841 */ 1842 void skb_orphan_partial(struct sk_buff *skb) 1843 { 1844 if (skb_is_tcp_pure_ack(skb)) 1845 return; 1846 1847 if (skb->destructor == sock_wfree 1848 #ifdef CONFIG_INET 1849 || skb->destructor == tcp_wfree 1850 #endif 1851 ) { 1852 struct sock *sk = skb->sk; 1853 1854 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 1855 WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); 1856 skb->destructor = sock_efree; 1857 } 1858 } else { 1859 skb_orphan(skb); 1860 } 1861 } 1862 EXPORT_SYMBOL(skb_orphan_partial); 1863 1864 /* 1865 * Read buffer destructor automatically called from kfree_skb. 1866 */ 1867 void sock_rfree(struct sk_buff *skb) 1868 { 1869 struct sock *sk = skb->sk; 1870 unsigned int len = skb->truesize; 1871 1872 atomic_sub(len, &sk->sk_rmem_alloc); 1873 sk_mem_uncharge(sk, len); 1874 } 1875 EXPORT_SYMBOL(sock_rfree); 1876 1877 /* 1878 * Buffer destructor for skbs that are not used directly in read or write 1879 * path, e.g. for error handler skbs. Automatically called from kfree_skb. 1880 */ 1881 void sock_efree(struct sk_buff *skb) 1882 { 1883 sock_put(skb->sk); 1884 } 1885 EXPORT_SYMBOL(sock_efree); 1886 1887 kuid_t sock_i_uid(struct sock *sk) 1888 { 1889 kuid_t uid; 1890 1891 read_lock_bh(&sk->sk_callback_lock); 1892 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID; 1893 read_unlock_bh(&sk->sk_callback_lock); 1894 return uid; 1895 } 1896 EXPORT_SYMBOL(sock_i_uid); 1897 1898 unsigned long sock_i_ino(struct sock *sk) 1899 { 1900 unsigned long ino; 1901 1902 read_lock_bh(&sk->sk_callback_lock); 1903 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1904 read_unlock_bh(&sk->sk_callback_lock); 1905 return ino; 1906 } 1907 EXPORT_SYMBOL(sock_i_ino); 1908 1909 /* 1910 * Allocate a skb from the socket's send buffer. 1911 */ 1912 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 1913 gfp_t priority) 1914 { 1915 if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1916 struct sk_buff *skb = alloc_skb(size, priority); 1917 if (skb) { 1918 skb_set_owner_w(skb, sk); 1919 return skb; 1920 } 1921 } 1922 return NULL; 1923 } 1924 EXPORT_SYMBOL(sock_wmalloc); 1925 1926 /* 1927 * Allocate a memory block from the socket's option memory buffer. 1928 */ 1929 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1930 { 1931 if ((unsigned int)size <= sysctl_optmem_max && 1932 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 1933 void *mem; 1934 /* First do the add, to avoid the race if kmalloc 1935 * might sleep. 1936 */ 1937 atomic_add(size, &sk->sk_omem_alloc); 1938 mem = kmalloc(size, priority); 1939 if (mem) 1940 return mem; 1941 atomic_sub(size, &sk->sk_omem_alloc); 1942 } 1943 return NULL; 1944 } 1945 EXPORT_SYMBOL(sock_kmalloc); 1946 1947 /* Free an option memory block. Note, we actually want the inline 1948 * here as this allows gcc to detect the nullify and fold away the 1949 * condition entirely. 1950 */ 1951 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size, 1952 const bool nullify) 1953 { 1954 if (WARN_ON_ONCE(!mem)) 1955 return; 1956 if (nullify) 1957 kzfree(mem); 1958 else 1959 kfree(mem); 1960 atomic_sub(size, &sk->sk_omem_alloc); 1961 } 1962 1963 void sock_kfree_s(struct sock *sk, void *mem, int size) 1964 { 1965 __sock_kfree_s(sk, mem, size, false); 1966 } 1967 EXPORT_SYMBOL(sock_kfree_s); 1968 1969 void sock_kzfree_s(struct sock *sk, void *mem, int size) 1970 { 1971 __sock_kfree_s(sk, mem, size, true); 1972 } 1973 EXPORT_SYMBOL(sock_kzfree_s); 1974 1975 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1976 I think, these locks should be removed for datagram sockets. 1977 */ 1978 static long sock_wait_for_wmem(struct sock *sk, long timeo) 1979 { 1980 DEFINE_WAIT(wait); 1981 1982 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1983 for (;;) { 1984 if (!timeo) 1985 break; 1986 if (signal_pending(current)) 1987 break; 1988 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1989 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1990 if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1991 break; 1992 if (sk->sk_shutdown & SEND_SHUTDOWN) 1993 break; 1994 if (sk->sk_err) 1995 break; 1996 timeo = schedule_timeout(timeo); 1997 } 1998 finish_wait(sk_sleep(sk), &wait); 1999 return timeo; 2000 } 2001 2002 2003 /* 2004 * Generic send/receive buffer handlers 2005 */ 2006 2007 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 2008 unsigned long data_len, int noblock, 2009 int *errcode, int max_page_order) 2010 { 2011 struct sk_buff *skb; 2012 long timeo; 2013 int err; 2014 2015 timeo = sock_sndtimeo(sk, noblock); 2016 for (;;) { 2017 err = sock_error(sk); 2018 if (err != 0) 2019 goto failure; 2020 2021 err = -EPIPE; 2022 if (sk->sk_shutdown & SEND_SHUTDOWN) 2023 goto failure; 2024 2025 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) 2026 break; 2027 2028 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2029 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 2030 err = -EAGAIN; 2031 if (!timeo) 2032 goto failure; 2033 if (signal_pending(current)) 2034 goto interrupted; 2035 timeo = sock_wait_for_wmem(sk, timeo); 2036 } 2037 skb = alloc_skb_with_frags(header_len, data_len, max_page_order, 2038 errcode, sk->sk_allocation); 2039 if (skb) 2040 skb_set_owner_w(skb, sk); 2041 return skb; 2042 2043 interrupted: 2044 err = sock_intr_errno(timeo); 2045 failure: 2046 *errcode = err; 2047 return NULL; 2048 } 2049 EXPORT_SYMBOL(sock_alloc_send_pskb); 2050 2051 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 2052 int noblock, int *errcode) 2053 { 2054 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); 2055 } 2056 EXPORT_SYMBOL(sock_alloc_send_skb); 2057 2058 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, 2059 struct sockcm_cookie *sockc) 2060 { 2061 u32 tsflags; 2062 2063 switch (cmsg->cmsg_type) { 2064 case SO_MARK: 2065 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2066 return -EPERM; 2067 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2068 return -EINVAL; 2069 sockc->mark = *(u32 *)CMSG_DATA(cmsg); 2070 break; 2071 case SO_TIMESTAMPING: 2072 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2073 return -EINVAL; 2074 2075 tsflags = *(u32 *)CMSG_DATA(cmsg); 2076 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) 2077 return -EINVAL; 2078 2079 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; 2080 sockc->tsflags |= tsflags; 2081 break; 2082 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ 2083 case SCM_RIGHTS: 2084 case SCM_CREDENTIALS: 2085 break; 2086 default: 2087 return -EINVAL; 2088 } 2089 return 0; 2090 } 2091 EXPORT_SYMBOL(__sock_cmsg_send); 2092 2093 int sock_cmsg_send(struct sock *sk, struct msghdr *msg, 2094 struct sockcm_cookie *sockc) 2095 { 2096 struct cmsghdr *cmsg; 2097 int ret; 2098 2099 for_each_cmsghdr(cmsg, msg) { 2100 if (!CMSG_OK(msg, cmsg)) 2101 return -EINVAL; 2102 if (cmsg->cmsg_level != SOL_SOCKET) 2103 continue; 2104 ret = __sock_cmsg_send(sk, msg, cmsg, sockc); 2105 if (ret) 2106 return ret; 2107 } 2108 return 0; 2109 } 2110 EXPORT_SYMBOL(sock_cmsg_send); 2111 2112 static void sk_enter_memory_pressure(struct sock *sk) 2113 { 2114 if (!sk->sk_prot->enter_memory_pressure) 2115 return; 2116 2117 sk->sk_prot->enter_memory_pressure(sk); 2118 } 2119 2120 static void sk_leave_memory_pressure(struct sock *sk) 2121 { 2122 if (sk->sk_prot->leave_memory_pressure) { 2123 sk->sk_prot->leave_memory_pressure(sk); 2124 } else { 2125 unsigned long *memory_pressure = sk->sk_prot->memory_pressure; 2126 2127 if (memory_pressure && *memory_pressure) 2128 *memory_pressure = 0; 2129 } 2130 } 2131 2132 /* On 32bit arches, an skb frag is limited to 2^15 */ 2133 #define SKB_FRAG_PAGE_ORDER get_order(32768) 2134 2135 /** 2136 * skb_page_frag_refill - check that a page_frag contains enough room 2137 * @sz: minimum size of the fragment we want to get 2138 * @pfrag: pointer to page_frag 2139 * @gfp: priority for memory allocation 2140 * 2141 * Note: While this allocator tries to use high order pages, there is 2142 * no guarantee that allocations succeed. Therefore, @sz MUST be 2143 * less or equal than PAGE_SIZE. 2144 */ 2145 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) 2146 { 2147 if (pfrag->page) { 2148 if (page_ref_count(pfrag->page) == 1) { 2149 pfrag->offset = 0; 2150 return true; 2151 } 2152 if (pfrag->offset + sz <= pfrag->size) 2153 return true; 2154 put_page(pfrag->page); 2155 } 2156 2157 pfrag->offset = 0; 2158 if (SKB_FRAG_PAGE_ORDER) { 2159 /* Avoid direct reclaim but allow kswapd to wake */ 2160 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | 2161 __GFP_COMP | __GFP_NOWARN | 2162 __GFP_NORETRY, 2163 SKB_FRAG_PAGE_ORDER); 2164 if (likely(pfrag->page)) { 2165 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 2166 return true; 2167 } 2168 } 2169 pfrag->page = alloc_page(gfp); 2170 if (likely(pfrag->page)) { 2171 pfrag->size = PAGE_SIZE; 2172 return true; 2173 } 2174 return false; 2175 } 2176 EXPORT_SYMBOL(skb_page_frag_refill); 2177 2178 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) 2179 { 2180 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) 2181 return true; 2182 2183 sk_enter_memory_pressure(sk); 2184 sk_stream_moderate_sndbuf(sk); 2185 return false; 2186 } 2187 EXPORT_SYMBOL(sk_page_frag_refill); 2188 2189 static void __lock_sock(struct sock *sk) 2190 __releases(&sk->sk_lock.slock) 2191 __acquires(&sk->sk_lock.slock) 2192 { 2193 DEFINE_WAIT(wait); 2194 2195 for (;;) { 2196 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 2197 TASK_UNINTERRUPTIBLE); 2198 spin_unlock_bh(&sk->sk_lock.slock); 2199 schedule(); 2200 spin_lock_bh(&sk->sk_lock.slock); 2201 if (!sock_owned_by_user(sk)) 2202 break; 2203 } 2204 finish_wait(&sk->sk_lock.wq, &wait); 2205 } 2206 2207 static void __release_sock(struct sock *sk) 2208 __releases(&sk->sk_lock.slock) 2209 __acquires(&sk->sk_lock.slock) 2210 { 2211 struct sk_buff *skb, *next; 2212 2213 while ((skb = sk->sk_backlog.head) != NULL) { 2214 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 2215 2216 spin_unlock_bh(&sk->sk_lock.slock); 2217 2218 do { 2219 next = skb->next; 2220 prefetch(next); 2221 WARN_ON_ONCE(skb_dst_is_noref(skb)); 2222 skb->next = NULL; 2223 sk_backlog_rcv(sk, skb); 2224 2225 cond_resched(); 2226 2227 skb = next; 2228 } while (skb != NULL); 2229 2230 spin_lock_bh(&sk->sk_lock.slock); 2231 } 2232 2233 /* 2234 * Doing the zeroing here guarantee we can not loop forever 2235 * while a wild producer attempts to flood us. 2236 */ 2237 sk->sk_backlog.len = 0; 2238 } 2239 2240 void __sk_flush_backlog(struct sock *sk) 2241 { 2242 spin_lock_bh(&sk->sk_lock.slock); 2243 __release_sock(sk); 2244 spin_unlock_bh(&sk->sk_lock.slock); 2245 } 2246 2247 /** 2248 * sk_wait_data - wait for data to arrive at sk_receive_queue 2249 * @sk: sock to wait on 2250 * @timeo: for how long 2251 * @skb: last skb seen on sk_receive_queue 2252 * 2253 * Now socket state including sk->sk_err is changed only under lock, 2254 * hence we may omit checks after joining wait queue. 2255 * We check receive queue before schedule() only as optimization; 2256 * it is very likely that release_sock() added new data. 2257 */ 2258 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) 2259 { 2260 DEFINE_WAIT_FUNC(wait, woken_wake_function); 2261 int rc; 2262 2263 add_wait_queue(sk_sleep(sk), &wait); 2264 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2265 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait); 2266 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2267 remove_wait_queue(sk_sleep(sk), &wait); 2268 return rc; 2269 } 2270 EXPORT_SYMBOL(sk_wait_data); 2271 2272 /** 2273 * __sk_mem_raise_allocated - increase memory_allocated 2274 * @sk: socket 2275 * @size: memory size to allocate 2276 * @amt: pages to allocate 2277 * @kind: allocation type 2278 * 2279 * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc 2280 */ 2281 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) 2282 { 2283 struct proto *prot = sk->sk_prot; 2284 long allocated = sk_memory_allocated_add(sk, amt); 2285 2286 if (mem_cgroup_sockets_enabled && sk->sk_memcg && 2287 !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) 2288 goto suppress_allocation; 2289 2290 /* Under limit. */ 2291 if (allocated <= sk_prot_mem_limits(sk, 0)) { 2292 sk_leave_memory_pressure(sk); 2293 return 1; 2294 } 2295 2296 /* Under pressure. */ 2297 if (allocated > sk_prot_mem_limits(sk, 1)) 2298 sk_enter_memory_pressure(sk); 2299 2300 /* Over hard limit. */ 2301 if (allocated > sk_prot_mem_limits(sk, 2)) 2302 goto suppress_allocation; 2303 2304 /* guarantee minimum buffer size under pressure */ 2305 if (kind == SK_MEM_RECV) { 2306 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 2307 return 1; 2308 2309 } else { /* SK_MEM_SEND */ 2310 if (sk->sk_type == SOCK_STREAM) { 2311 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 2312 return 1; 2313 } else if (refcount_read(&sk->sk_wmem_alloc) < 2314 prot->sysctl_wmem[0]) 2315 return 1; 2316 } 2317 2318 if (sk_has_memory_pressure(sk)) { 2319 int alloc; 2320 2321 if (!sk_under_memory_pressure(sk)) 2322 return 1; 2323 alloc = sk_sockets_allocated_read_positive(sk); 2324 if (sk_prot_mem_limits(sk, 2) > alloc * 2325 sk_mem_pages(sk->sk_wmem_queued + 2326 atomic_read(&sk->sk_rmem_alloc) + 2327 sk->sk_forward_alloc)) 2328 return 1; 2329 } 2330 2331 suppress_allocation: 2332 2333 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 2334 sk_stream_moderate_sndbuf(sk); 2335 2336 /* Fail only if socket is _under_ its sndbuf. 2337 * In this case we cannot block, so that we have to fail. 2338 */ 2339 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) 2340 return 1; 2341 } 2342 2343 trace_sock_exceed_buf_limit(sk, prot, allocated); 2344 2345 sk_memory_allocated_sub(sk, amt); 2346 2347 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 2348 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); 2349 2350 return 0; 2351 } 2352 EXPORT_SYMBOL(__sk_mem_raise_allocated); 2353 2354 /** 2355 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 2356 * @sk: socket 2357 * @size: memory size to allocate 2358 * @kind: allocation type 2359 * 2360 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 2361 * rmem allocation. This function assumes that protocols which have 2362 * memory_pressure use sk_wmem_queued as write buffer accounting. 2363 */ 2364 int __sk_mem_schedule(struct sock *sk, int size, int kind) 2365 { 2366 int ret, amt = sk_mem_pages(size); 2367 2368 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; 2369 ret = __sk_mem_raise_allocated(sk, size, amt, kind); 2370 if (!ret) 2371 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; 2372 return ret; 2373 } 2374 EXPORT_SYMBOL(__sk_mem_schedule); 2375 2376 /** 2377 * __sk_mem_reduce_allocated - reclaim memory_allocated 2378 * @sk: socket 2379 * @amount: number of quanta 2380 * 2381 * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc 2382 */ 2383 void __sk_mem_reduce_allocated(struct sock *sk, int amount) 2384 { 2385 sk_memory_allocated_sub(sk, amount); 2386 2387 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 2388 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); 2389 2390 if (sk_under_memory_pressure(sk) && 2391 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 2392 sk_leave_memory_pressure(sk); 2393 } 2394 EXPORT_SYMBOL(__sk_mem_reduce_allocated); 2395 2396 /** 2397 * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated 2398 * @sk: socket 2399 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) 2400 */ 2401 void __sk_mem_reclaim(struct sock *sk, int amount) 2402 { 2403 amount >>= SK_MEM_QUANTUM_SHIFT; 2404 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; 2405 __sk_mem_reduce_allocated(sk, amount); 2406 } 2407 EXPORT_SYMBOL(__sk_mem_reclaim); 2408 2409 int sk_set_peek_off(struct sock *sk, int val) 2410 { 2411 if (val < 0) 2412 return -EINVAL; 2413 2414 sk->sk_peek_off = val; 2415 return 0; 2416 } 2417 EXPORT_SYMBOL_GPL(sk_set_peek_off); 2418 2419 /* 2420 * Set of default routines for initialising struct proto_ops when 2421 * the protocol does not support a particular function. In certain 2422 * cases where it makes no sense for a protocol to have a "do nothing" 2423 * function, some default processing is provided. 2424 */ 2425 2426 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 2427 { 2428 return -EOPNOTSUPP; 2429 } 2430 EXPORT_SYMBOL(sock_no_bind); 2431 2432 int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 2433 int len, int flags) 2434 { 2435 return -EOPNOTSUPP; 2436 } 2437 EXPORT_SYMBOL(sock_no_connect); 2438 2439 int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 2440 { 2441 return -EOPNOTSUPP; 2442 } 2443 EXPORT_SYMBOL(sock_no_socketpair); 2444 2445 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, 2446 bool kern) 2447 { 2448 return -EOPNOTSUPP; 2449 } 2450 EXPORT_SYMBOL(sock_no_accept); 2451 2452 int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 2453 int *len, int peer) 2454 { 2455 return -EOPNOTSUPP; 2456 } 2457 EXPORT_SYMBOL(sock_no_getname); 2458 2459 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 2460 { 2461 return 0; 2462 } 2463 EXPORT_SYMBOL(sock_no_poll); 2464 2465 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2466 { 2467 return -EOPNOTSUPP; 2468 } 2469 EXPORT_SYMBOL(sock_no_ioctl); 2470 2471 int sock_no_listen(struct socket *sock, int backlog) 2472 { 2473 return -EOPNOTSUPP; 2474 } 2475 EXPORT_SYMBOL(sock_no_listen); 2476 2477 int sock_no_shutdown(struct socket *sock, int how) 2478 { 2479 return -EOPNOTSUPP; 2480 } 2481 EXPORT_SYMBOL(sock_no_shutdown); 2482 2483 int sock_no_setsockopt(struct socket *sock, int level, int optname, 2484 char __user *optval, unsigned int optlen) 2485 { 2486 return -EOPNOTSUPP; 2487 } 2488 EXPORT_SYMBOL(sock_no_setsockopt); 2489 2490 int sock_no_getsockopt(struct socket *sock, int level, int optname, 2491 char __user *optval, int __user *optlen) 2492 { 2493 return -EOPNOTSUPP; 2494 } 2495 EXPORT_SYMBOL(sock_no_getsockopt); 2496 2497 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) 2498 { 2499 return -EOPNOTSUPP; 2500 } 2501 EXPORT_SYMBOL(sock_no_sendmsg); 2502 2503 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, 2504 int flags) 2505 { 2506 return -EOPNOTSUPP; 2507 } 2508 EXPORT_SYMBOL(sock_no_recvmsg); 2509 2510 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 2511 { 2512 /* Mirror missing mmap method error code */ 2513 return -ENODEV; 2514 } 2515 EXPORT_SYMBOL(sock_no_mmap); 2516 2517 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 2518 { 2519 ssize_t res; 2520 struct msghdr msg = {.msg_flags = flags}; 2521 struct kvec iov; 2522 char *kaddr = kmap(page); 2523 iov.iov_base = kaddr + offset; 2524 iov.iov_len = size; 2525 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 2526 kunmap(page); 2527 return res; 2528 } 2529 EXPORT_SYMBOL(sock_no_sendpage); 2530 2531 /* 2532 * Default Socket Callbacks 2533 */ 2534 2535 static void sock_def_wakeup(struct sock *sk) 2536 { 2537 struct socket_wq *wq; 2538 2539 rcu_read_lock(); 2540 wq = rcu_dereference(sk->sk_wq); 2541 if (skwq_has_sleeper(wq)) 2542 wake_up_interruptible_all(&wq->wait); 2543 rcu_read_unlock(); 2544 } 2545 2546 static void sock_def_error_report(struct sock *sk) 2547 { 2548 struct socket_wq *wq; 2549 2550 rcu_read_lock(); 2551 wq = rcu_dereference(sk->sk_wq); 2552 if (skwq_has_sleeper(wq)) 2553 wake_up_interruptible_poll(&wq->wait, POLLERR); 2554 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 2555 rcu_read_unlock(); 2556 } 2557 2558 static void sock_def_readable(struct sock *sk) 2559 { 2560 struct socket_wq *wq; 2561 2562 rcu_read_lock(); 2563 wq = rcu_dereference(sk->sk_wq); 2564 if (skwq_has_sleeper(wq)) 2565 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 2566 POLLRDNORM | POLLRDBAND); 2567 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 2568 rcu_read_unlock(); 2569 } 2570 2571 static void sock_def_write_space(struct sock *sk) 2572 { 2573 struct socket_wq *wq; 2574 2575 rcu_read_lock(); 2576 2577 /* Do not wake up a writer until he can make "significant" 2578 * progress. --DaveM 2579 */ 2580 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2581 wq = rcu_dereference(sk->sk_wq); 2582 if (skwq_has_sleeper(wq)) 2583 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2584 POLLWRNORM | POLLWRBAND); 2585 2586 /* Should agree with poll, otherwise some programs break */ 2587 if (sock_writeable(sk)) 2588 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 2589 } 2590 2591 rcu_read_unlock(); 2592 } 2593 2594 static void sock_def_destruct(struct sock *sk) 2595 { 2596 } 2597 2598 void sk_send_sigurg(struct sock *sk) 2599 { 2600 if (sk->sk_socket && sk->sk_socket->file) 2601 if (send_sigurg(&sk->sk_socket->file->f_owner)) 2602 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 2603 } 2604 EXPORT_SYMBOL(sk_send_sigurg); 2605 2606 void sk_reset_timer(struct sock *sk, struct timer_list* timer, 2607 unsigned long expires) 2608 { 2609 if (!mod_timer(timer, expires)) 2610 sock_hold(sk); 2611 } 2612 EXPORT_SYMBOL(sk_reset_timer); 2613 2614 void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2615 { 2616 if (del_timer(timer)) 2617 __sock_put(sk); 2618 } 2619 EXPORT_SYMBOL(sk_stop_timer); 2620 2621 void sock_init_data(struct socket *sock, struct sock *sk) 2622 { 2623 sk_init_common(sk); 2624 sk->sk_send_head = NULL; 2625 2626 init_timer(&sk->sk_timer); 2627 2628 sk->sk_allocation = GFP_KERNEL; 2629 sk->sk_rcvbuf = sysctl_rmem_default; 2630 sk->sk_sndbuf = sysctl_wmem_default; 2631 sk->sk_state = TCP_CLOSE; 2632 sk_set_socket(sk, sock); 2633 2634 sock_set_flag(sk, SOCK_ZAPPED); 2635 2636 if (sock) { 2637 sk->sk_type = sock->type; 2638 sk->sk_wq = sock->wq; 2639 sock->sk = sk; 2640 sk->sk_uid = SOCK_INODE(sock)->i_uid; 2641 } else { 2642 sk->sk_wq = NULL; 2643 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); 2644 } 2645 2646 rwlock_init(&sk->sk_callback_lock); 2647 if (sk->sk_kern_sock) 2648 lockdep_set_class_and_name( 2649 &sk->sk_callback_lock, 2650 af_kern_callback_keys + sk->sk_family, 2651 af_family_kern_clock_key_strings[sk->sk_family]); 2652 else 2653 lockdep_set_class_and_name( 2654 &sk->sk_callback_lock, 2655 af_callback_keys + sk->sk_family, 2656 af_family_clock_key_strings[sk->sk_family]); 2657 2658 sk->sk_state_change = sock_def_wakeup; 2659 sk->sk_data_ready = sock_def_readable; 2660 sk->sk_write_space = sock_def_write_space; 2661 sk->sk_error_report = sock_def_error_report; 2662 sk->sk_destruct = sock_def_destruct; 2663 2664 sk->sk_frag.page = NULL; 2665 sk->sk_frag.offset = 0; 2666 sk->sk_peek_off = -1; 2667 2668 sk->sk_peer_pid = NULL; 2669 sk->sk_peer_cred = NULL; 2670 sk->sk_write_pending = 0; 2671 sk->sk_rcvlowat = 1; 2672 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 2673 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2674 2675 sk->sk_stamp = SK_DEFAULT_STAMP; 2676 2677 #ifdef CONFIG_NET_RX_BUSY_POLL 2678 sk->sk_napi_id = 0; 2679 sk->sk_ll_usec = sysctl_net_busy_read; 2680 #endif 2681 2682 sk->sk_max_pacing_rate = ~0U; 2683 sk->sk_pacing_rate = ~0U; 2684 sk->sk_incoming_cpu = -1; 2685 /* 2686 * Before updating sk_refcnt, we must commit prior changes to memory 2687 * (Documentation/RCU/rculist_nulls.txt for details) 2688 */ 2689 smp_wmb(); 2690 refcount_set(&sk->sk_refcnt, 1); 2691 atomic_set(&sk->sk_drops, 0); 2692 } 2693 EXPORT_SYMBOL(sock_init_data); 2694 2695 void lock_sock_nested(struct sock *sk, int subclass) 2696 { 2697 might_sleep(); 2698 spin_lock_bh(&sk->sk_lock.slock); 2699 if (sk->sk_lock.owned) 2700 __lock_sock(sk); 2701 sk->sk_lock.owned = 1; 2702 spin_unlock(&sk->sk_lock.slock); 2703 /* 2704 * The sk_lock has mutex_lock() semantics here: 2705 */ 2706 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2707 local_bh_enable(); 2708 } 2709 EXPORT_SYMBOL(lock_sock_nested); 2710 2711 void release_sock(struct sock *sk) 2712 { 2713 spin_lock_bh(&sk->sk_lock.slock); 2714 if (sk->sk_backlog.tail) 2715 __release_sock(sk); 2716 2717 /* Warning : release_cb() might need to release sk ownership, 2718 * ie call sock_release_ownership(sk) before us. 2719 */ 2720 if (sk->sk_prot->release_cb) 2721 sk->sk_prot->release_cb(sk); 2722 2723 sock_release_ownership(sk); 2724 if (waitqueue_active(&sk->sk_lock.wq)) 2725 wake_up(&sk->sk_lock.wq); 2726 spin_unlock_bh(&sk->sk_lock.slock); 2727 } 2728 EXPORT_SYMBOL(release_sock); 2729 2730 /** 2731 * lock_sock_fast - fast version of lock_sock 2732 * @sk: socket 2733 * 2734 * This version should be used for very small section, where process wont block 2735 * return false if fast path is taken: 2736 * 2737 * sk_lock.slock locked, owned = 0, BH disabled 2738 * 2739 * return true if slow path is taken: 2740 * 2741 * sk_lock.slock unlocked, owned = 1, BH enabled 2742 */ 2743 bool lock_sock_fast(struct sock *sk) 2744 { 2745 might_sleep(); 2746 spin_lock_bh(&sk->sk_lock.slock); 2747 2748 if (!sk->sk_lock.owned) 2749 /* 2750 * Note : We must disable BH 2751 */ 2752 return false; 2753 2754 __lock_sock(sk); 2755 sk->sk_lock.owned = 1; 2756 spin_unlock(&sk->sk_lock.slock); 2757 /* 2758 * The sk_lock has mutex_lock() semantics here: 2759 */ 2760 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); 2761 local_bh_enable(); 2762 return true; 2763 } 2764 EXPORT_SYMBOL(lock_sock_fast); 2765 2766 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2767 { 2768 struct timeval tv; 2769 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2770 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2771 tv = ktime_to_timeval(sk->sk_stamp); 2772 if (tv.tv_sec == -1) 2773 return -ENOENT; 2774 if (tv.tv_sec == 0) { 2775 sk->sk_stamp = ktime_get_real(); 2776 tv = ktime_to_timeval(sk->sk_stamp); 2777 } 2778 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2779 } 2780 EXPORT_SYMBOL(sock_get_timestamp); 2781 2782 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 2783 { 2784 struct timespec ts; 2785 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2786 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2787 ts = ktime_to_timespec(sk->sk_stamp); 2788 if (ts.tv_sec == -1) 2789 return -ENOENT; 2790 if (ts.tv_sec == 0) { 2791 sk->sk_stamp = ktime_get_real(); 2792 ts = ktime_to_timespec(sk->sk_stamp); 2793 } 2794 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2795 } 2796 EXPORT_SYMBOL(sock_get_timestampns); 2797 2798 void sock_enable_timestamp(struct sock *sk, int flag) 2799 { 2800 if (!sock_flag(sk, flag)) { 2801 unsigned long previous_flags = sk->sk_flags; 2802 2803 sock_set_flag(sk, flag); 2804 /* 2805 * we just set one of the two flags which require net 2806 * time stamping, but time stamping might have been on 2807 * already because of the other one 2808 */ 2809 if (sock_needs_netstamp(sk) && 2810 !(previous_flags & SK_FLAGS_TIMESTAMP)) 2811 net_enable_timestamp(); 2812 } 2813 } 2814 2815 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, 2816 int level, int type) 2817 { 2818 struct sock_exterr_skb *serr; 2819 struct sk_buff *skb; 2820 int copied, err; 2821 2822 err = -EAGAIN; 2823 skb = sock_dequeue_err_skb(sk); 2824 if (skb == NULL) 2825 goto out; 2826 2827 copied = skb->len; 2828 if (copied > len) { 2829 msg->msg_flags |= MSG_TRUNC; 2830 copied = len; 2831 } 2832 err = skb_copy_datagram_msg(skb, 0, msg, copied); 2833 if (err) 2834 goto out_free_skb; 2835 2836 sock_recv_timestamp(msg, sk, skb); 2837 2838 serr = SKB_EXT_ERR(skb); 2839 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee); 2840 2841 msg->msg_flags |= MSG_ERRQUEUE; 2842 err = copied; 2843 2844 out_free_skb: 2845 kfree_skb(skb); 2846 out: 2847 return err; 2848 } 2849 EXPORT_SYMBOL(sock_recv_errqueue); 2850 2851 /* 2852 * Get a socket option on an socket. 2853 * 2854 * FIX: POSIX 1003.1g is very ambiguous here. It states that 2855 * asynchronous errors should be reported by getsockopt. We assume 2856 * this means if you specify SO_ERROR (otherwise whats the point of it). 2857 */ 2858 int sock_common_getsockopt(struct socket *sock, int level, int optname, 2859 char __user *optval, int __user *optlen) 2860 { 2861 struct sock *sk = sock->sk; 2862 2863 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2864 } 2865 EXPORT_SYMBOL(sock_common_getsockopt); 2866 2867 #ifdef CONFIG_COMPAT 2868 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, 2869 char __user *optval, int __user *optlen) 2870 { 2871 struct sock *sk = sock->sk; 2872 2873 if (sk->sk_prot->compat_getsockopt != NULL) 2874 return sk->sk_prot->compat_getsockopt(sk, level, optname, 2875 optval, optlen); 2876 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2877 } 2878 EXPORT_SYMBOL(compat_sock_common_getsockopt); 2879 #endif 2880 2881 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2882 int flags) 2883 { 2884 struct sock *sk = sock->sk; 2885 int addr_len = 0; 2886 int err; 2887 2888 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 2889 flags & ~MSG_DONTWAIT, &addr_len); 2890 if (err >= 0) 2891 msg->msg_namelen = addr_len; 2892 return err; 2893 } 2894 EXPORT_SYMBOL(sock_common_recvmsg); 2895 2896 /* 2897 * Set socket options on an inet socket. 2898 */ 2899 int sock_common_setsockopt(struct socket *sock, int level, int optname, 2900 char __user *optval, unsigned int optlen) 2901 { 2902 struct sock *sk = sock->sk; 2903 2904 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2905 } 2906 EXPORT_SYMBOL(sock_common_setsockopt); 2907 2908 #ifdef CONFIG_COMPAT 2909 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, 2910 char __user *optval, unsigned int optlen) 2911 { 2912 struct sock *sk = sock->sk; 2913 2914 if (sk->sk_prot->compat_setsockopt != NULL) 2915 return sk->sk_prot->compat_setsockopt(sk, level, optname, 2916 optval, optlen); 2917 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2918 } 2919 EXPORT_SYMBOL(compat_sock_common_setsockopt); 2920 #endif 2921 2922 void sk_common_release(struct sock *sk) 2923 { 2924 if (sk->sk_prot->destroy) 2925 sk->sk_prot->destroy(sk); 2926 2927 /* 2928 * Observation: when sock_common_release is called, processes have 2929 * no access to socket. But net still has. 2930 * Step one, detach it from networking: 2931 * 2932 * A. Remove from hash tables. 2933 */ 2934 2935 sk->sk_prot->unhash(sk); 2936 2937 /* 2938 * In this point socket cannot receive new packets, but it is possible 2939 * that some packets are in flight because some CPU runs receiver and 2940 * did hash table lookup before we unhashed socket. They will achieve 2941 * receive queue and will be purged by socket destructor. 2942 * 2943 * Also we still have packets pending on receive queue and probably, 2944 * our own packets waiting in device queues. sock_destroy will drain 2945 * receive queue, but transmitted packets will delay socket destruction 2946 * until the last reference will be released. 2947 */ 2948 2949 sock_orphan(sk); 2950 2951 xfrm_sk_free_policy(sk); 2952 2953 sk_refcnt_debug_release(sk); 2954 2955 sock_put(sk); 2956 } 2957 EXPORT_SYMBOL(sk_common_release); 2958 2959 void sk_get_meminfo(const struct sock *sk, u32 *mem) 2960 { 2961 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); 2962 2963 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 2964 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; 2965 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); 2966 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; 2967 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 2968 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; 2969 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); 2970 mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; 2971 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); 2972 } 2973 2974 #ifdef CONFIG_PROC_FS 2975 #define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2976 struct prot_inuse { 2977 int val[PROTO_INUSE_NR]; 2978 }; 2979 2980 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 2981 2982 #ifdef CONFIG_NET_NS 2983 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2984 { 2985 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 2986 } 2987 EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2988 2989 int sock_prot_inuse_get(struct net *net, struct proto *prot) 2990 { 2991 int cpu, idx = prot->inuse_idx; 2992 int res = 0; 2993 2994 for_each_possible_cpu(cpu) 2995 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 2996 2997 return res >= 0 ? res : 0; 2998 } 2999 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3000 3001 static int __net_init sock_inuse_init_net(struct net *net) 3002 { 3003 net->core.inuse = alloc_percpu(struct prot_inuse); 3004 return net->core.inuse ? 0 : -ENOMEM; 3005 } 3006 3007 static void __net_exit sock_inuse_exit_net(struct net *net) 3008 { 3009 free_percpu(net->core.inuse); 3010 } 3011 3012 static struct pernet_operations net_inuse_ops = { 3013 .init = sock_inuse_init_net, 3014 .exit = sock_inuse_exit_net, 3015 }; 3016 3017 static __init int net_inuse_init(void) 3018 { 3019 if (register_pernet_subsys(&net_inuse_ops)) 3020 panic("Cannot initialize net inuse counters"); 3021 3022 return 0; 3023 } 3024 3025 core_initcall(net_inuse_init); 3026 #else 3027 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); 3028 3029 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 3030 { 3031 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); 3032 } 3033 EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 3034 3035 int sock_prot_inuse_get(struct net *net, struct proto *prot) 3036 { 3037 int cpu, idx = prot->inuse_idx; 3038 int res = 0; 3039 3040 for_each_possible_cpu(cpu) 3041 res += per_cpu(prot_inuse, cpu).val[idx]; 3042 3043 return res >= 0 ? res : 0; 3044 } 3045 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3046 #endif 3047 3048 static void assign_proto_idx(struct proto *prot) 3049 { 3050 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 3051 3052 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 3053 pr_err("PROTO_INUSE_NR exhausted\n"); 3054 return; 3055 } 3056 3057 set_bit(prot->inuse_idx, proto_inuse_idx); 3058 } 3059 3060 static void release_proto_idx(struct proto *prot) 3061 { 3062 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 3063 clear_bit(prot->inuse_idx, proto_inuse_idx); 3064 } 3065 #else 3066 static inline void assign_proto_idx(struct proto *prot) 3067 { 3068 } 3069 3070 static inline void release_proto_idx(struct proto *prot) 3071 { 3072 } 3073 #endif 3074 3075 static void req_prot_cleanup(struct request_sock_ops *rsk_prot) 3076 { 3077 if (!rsk_prot) 3078 return; 3079 kfree(rsk_prot->slab_name); 3080 rsk_prot->slab_name = NULL; 3081 kmem_cache_destroy(rsk_prot->slab); 3082 rsk_prot->slab = NULL; 3083 } 3084 3085 static int req_prot_init(const struct proto *prot) 3086 { 3087 struct request_sock_ops *rsk_prot = prot->rsk_prot; 3088 3089 if (!rsk_prot) 3090 return 0; 3091 3092 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", 3093 prot->name); 3094 if (!rsk_prot->slab_name) 3095 return -ENOMEM; 3096 3097 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 3098 rsk_prot->obj_size, 0, 3099 prot->slab_flags, NULL); 3100 3101 if (!rsk_prot->slab) { 3102 pr_crit("%s: Can't create request sock SLAB cache!\n", 3103 prot->name); 3104 return -ENOMEM; 3105 } 3106 return 0; 3107 } 3108 3109 int proto_register(struct proto *prot, int alloc_slab) 3110 { 3111 if (alloc_slab) { 3112 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 3113 SLAB_HWCACHE_ALIGN | prot->slab_flags, 3114 NULL); 3115 3116 if (prot->slab == NULL) { 3117 pr_crit("%s: Can't create sock SLAB cache!\n", 3118 prot->name); 3119 goto out; 3120 } 3121 3122 if (req_prot_init(prot)) 3123 goto out_free_request_sock_slab; 3124 3125 if (prot->twsk_prot != NULL) { 3126 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); 3127 3128 if (prot->twsk_prot->twsk_slab_name == NULL) 3129 goto out_free_request_sock_slab; 3130 3131 prot->twsk_prot->twsk_slab = 3132 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 3133 prot->twsk_prot->twsk_obj_size, 3134 0, 3135 prot->slab_flags, 3136 NULL); 3137 if (prot->twsk_prot->twsk_slab == NULL) 3138 goto out_free_timewait_sock_slab_name; 3139 } 3140 } 3141 3142 mutex_lock(&proto_list_mutex); 3143 list_add(&prot->node, &proto_list); 3144 assign_proto_idx(prot); 3145 mutex_unlock(&proto_list_mutex); 3146 return 0; 3147 3148 out_free_timewait_sock_slab_name: 3149 kfree(prot->twsk_prot->twsk_slab_name); 3150 out_free_request_sock_slab: 3151 req_prot_cleanup(prot->rsk_prot); 3152 3153 kmem_cache_destroy(prot->slab); 3154 prot->slab = NULL; 3155 out: 3156 return -ENOBUFS; 3157 } 3158 EXPORT_SYMBOL(proto_register); 3159 3160 void proto_unregister(struct proto *prot) 3161 { 3162 mutex_lock(&proto_list_mutex); 3163 release_proto_idx(prot); 3164 list_del(&prot->node); 3165 mutex_unlock(&proto_list_mutex); 3166 3167 kmem_cache_destroy(prot->slab); 3168 prot->slab = NULL; 3169 3170 req_prot_cleanup(prot->rsk_prot); 3171 3172 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { 3173 kmem_cache_destroy(prot->twsk_prot->twsk_slab); 3174 kfree(prot->twsk_prot->twsk_slab_name); 3175 prot->twsk_prot->twsk_slab = NULL; 3176 } 3177 } 3178 EXPORT_SYMBOL(proto_unregister); 3179 3180 #ifdef CONFIG_PROC_FS 3181 static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 3182 __acquires(proto_list_mutex) 3183 { 3184 mutex_lock(&proto_list_mutex); 3185 return seq_list_start_head(&proto_list, *pos); 3186 } 3187 3188 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3189 { 3190 return seq_list_next(v, &proto_list, pos); 3191 } 3192 3193 static void proto_seq_stop(struct seq_file *seq, void *v) 3194 __releases(proto_list_mutex) 3195 { 3196 mutex_unlock(&proto_list_mutex); 3197 } 3198 3199 static char proto_method_implemented(const void *method) 3200 { 3201 return method == NULL ? 'n' : 'y'; 3202 } 3203 static long sock_prot_memory_allocated(struct proto *proto) 3204 { 3205 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; 3206 } 3207 3208 static char *sock_prot_memory_pressure(struct proto *proto) 3209 { 3210 return proto->memory_pressure != NULL ? 3211 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 3212 } 3213 3214 static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 3215 { 3216 3217 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 3218 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 3219 proto->name, 3220 proto->obj_size, 3221 sock_prot_inuse_get(seq_file_net(seq), proto), 3222 sock_prot_memory_allocated(proto), 3223 sock_prot_memory_pressure(proto), 3224 proto->max_header, 3225 proto->slab == NULL ? "no" : "yes", 3226 module_name(proto->owner), 3227 proto_method_implemented(proto->close), 3228 proto_method_implemented(proto->connect), 3229 proto_method_implemented(proto->disconnect), 3230 proto_method_implemented(proto->accept), 3231 proto_method_implemented(proto->ioctl), 3232 proto_method_implemented(proto->init), 3233 proto_method_implemented(proto->destroy), 3234 proto_method_implemented(proto->shutdown), 3235 proto_method_implemented(proto->setsockopt), 3236 proto_method_implemented(proto->getsockopt), 3237 proto_method_implemented(proto->sendmsg), 3238 proto_method_implemented(proto->recvmsg), 3239 proto_method_implemented(proto->sendpage), 3240 proto_method_implemented(proto->bind), 3241 proto_method_implemented(proto->backlog_rcv), 3242 proto_method_implemented(proto->hash), 3243 proto_method_implemented(proto->unhash), 3244 proto_method_implemented(proto->get_port), 3245 proto_method_implemented(proto->enter_memory_pressure)); 3246 } 3247 3248 static int proto_seq_show(struct seq_file *seq, void *v) 3249 { 3250 if (v == &proto_list) 3251 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 3252 "protocol", 3253 "size", 3254 "sockets", 3255 "memory", 3256 "press", 3257 "maxhdr", 3258 "slab", 3259 "module", 3260 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 3261 else 3262 proto_seq_printf(seq, list_entry(v, struct proto, node)); 3263 return 0; 3264 } 3265 3266 static const struct seq_operations proto_seq_ops = { 3267 .start = proto_seq_start, 3268 .next = proto_seq_next, 3269 .stop = proto_seq_stop, 3270 .show = proto_seq_show, 3271 }; 3272 3273 static int proto_seq_open(struct inode *inode, struct file *file) 3274 { 3275 return seq_open_net(inode, file, &proto_seq_ops, 3276 sizeof(struct seq_net_private)); 3277 } 3278 3279 static const struct file_operations proto_seq_fops = { 3280 .owner = THIS_MODULE, 3281 .open = proto_seq_open, 3282 .read = seq_read, 3283 .llseek = seq_lseek, 3284 .release = seq_release_net, 3285 }; 3286 3287 static __net_init int proto_init_net(struct net *net) 3288 { 3289 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops)) 3290 return -ENOMEM; 3291 3292 return 0; 3293 } 3294 3295 static __net_exit void proto_exit_net(struct net *net) 3296 { 3297 remove_proc_entry("protocols", net->proc_net); 3298 } 3299 3300 3301 static __net_initdata struct pernet_operations proto_net_ops = { 3302 .init = proto_init_net, 3303 .exit = proto_exit_net, 3304 }; 3305 3306 static int __init proto_init(void) 3307 { 3308 return register_pernet_subsys(&proto_net_ops); 3309 } 3310 3311 subsys_initcall(proto_init); 3312 3313 #endif /* PROC_FS */ 3314 3315 #ifdef CONFIG_NET_RX_BUSY_POLL 3316 bool sk_busy_loop_end(void *p, unsigned long start_time) 3317 { 3318 struct sock *sk = p; 3319 3320 return !skb_queue_empty(&sk->sk_receive_queue) || 3321 sk_busy_loop_timeout(sk, start_time); 3322 } 3323 EXPORT_SYMBOL(sk_busy_loop_end); 3324 #endif /* CONFIG_NET_RX_BUSY_POLL */ 3325