1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/net/sunrpc/xprtsock.c 4 * 5 * Client-side transport implementation for sockets. 6 * 7 * TCP callback races fixes (C) 1998 Red Hat 8 * TCP send fixes (C) 1998 Red Hat 9 * TCP NFS related read + write fixes 10 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> 11 * 12 * Rewrite of larges part of the code in order to stabilize TCP stuff. 13 * Fix behaviour when socket buffer is full. 14 * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> 15 * 16 * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> 17 * 18 * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. 19 * <gilles.quillard@bull.net> 20 */ 21 22 #include <linux/types.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/module.h> 26 #include <linux/capability.h> 27 #include <linux/pagemap.h> 28 #include <linux/errno.h> 29 #include <linux/socket.h> 30 #include <linux/in.h> 31 #include <linux/net.h> 32 #include <linux/mm.h> 33 #include <linux/un.h> 34 #include <linux/udp.h> 35 #include <linux/tcp.h> 36 #include <linux/sunrpc/clnt.h> 37 #include <linux/sunrpc/addr.h> 38 #include <linux/sunrpc/sched.h> 39 #include <linux/sunrpc/svcsock.h> 40 #include <linux/sunrpc/xprtsock.h> 41 #include <linux/file.h> 42 #ifdef CONFIG_SUNRPC_BACKCHANNEL 43 #include <linux/sunrpc/bc_xprt.h> 44 #endif 45 46 #include <net/sock.h> 47 #include <net/checksum.h> 48 #include <net/udp.h> 49 #include <net/tcp.h> 50 #include <linux/bvec.h> 51 #include <linux/highmem.h> 52 #include <linux/uio.h> 53 #include <linux/sched/mm.h> 54 55 #include <trace/events/sunrpc.h> 56 57 #include "socklib.h" 58 #include "sunrpc.h" 59 60 static void xs_close(struct rpc_xprt *xprt); 61 static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); 62 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 63 struct socket *sock); 64 65 /* 66 * xprtsock tunables 67 */ 68 static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 69 static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; 70 static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; 71 72 static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 73 static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 74 75 #define XS_TCP_LINGER_TO (15U * HZ) 76 static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; 77 78 /* 79 * We can register our own files under /proc/sys/sunrpc by 80 * calling register_sysctl_table() again. The files in that 81 * directory become the union of all files registered there. 82 * 83 * We simply need to make sure that we don't collide with 84 * someone else's file names! 85 */ 86 87 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 88 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 89 static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; 90 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; 91 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; 92 93 static struct ctl_table_header *sunrpc_table_header; 94 95 static struct xprt_class xs_local_transport; 96 static struct xprt_class xs_udp_transport; 97 static struct xprt_class xs_tcp_transport; 98 static struct xprt_class xs_bc_tcp_transport; 99 100 /* 101 * FIXME: changing the UDP slot table size should also resize the UDP 102 * socket buffers for existing UDP transports 103 */ 104 static struct ctl_table xs_tunables_table[] = { 105 { 106 .procname = "udp_slot_table_entries", 107 .data = &xprt_udp_slot_table_entries, 108 .maxlen = sizeof(unsigned int), 109 .mode = 0644, 110 .proc_handler = proc_dointvec_minmax, 111 .extra1 = &min_slot_table_size, 112 .extra2 = &max_slot_table_size 113 }, 114 { 115 .procname = "tcp_slot_table_entries", 116 .data = &xprt_tcp_slot_table_entries, 117 .maxlen = sizeof(unsigned int), 118 .mode = 0644, 119 .proc_handler = proc_dointvec_minmax, 120 .extra1 = &min_slot_table_size, 121 .extra2 = &max_slot_table_size 122 }, 123 { 124 .procname = "tcp_max_slot_table_entries", 125 .data = &xprt_max_tcp_slot_table_entries, 126 .maxlen = sizeof(unsigned int), 127 .mode = 0644, 128 .proc_handler = proc_dointvec_minmax, 129 .extra1 = &min_slot_table_size, 130 .extra2 = &max_tcp_slot_table_limit 131 }, 132 { 133 .procname = "min_resvport", 134 .data = &xprt_min_resvport, 135 .maxlen = sizeof(unsigned int), 136 .mode = 0644, 137 .proc_handler = proc_dointvec_minmax, 138 .extra1 = &xprt_min_resvport_limit, 139 .extra2 = &xprt_max_resvport_limit 140 }, 141 { 142 .procname = "max_resvport", 143 .data = &xprt_max_resvport, 144 .maxlen = sizeof(unsigned int), 145 .mode = 0644, 146 .proc_handler = proc_dointvec_minmax, 147 .extra1 = &xprt_min_resvport_limit, 148 .extra2 = &xprt_max_resvport_limit 149 }, 150 { 151 .procname = "tcp_fin_timeout", 152 .data = &xs_tcp_fin_timeout, 153 .maxlen = sizeof(xs_tcp_fin_timeout), 154 .mode = 0644, 155 .proc_handler = proc_dointvec_jiffies, 156 }, 157 { }, 158 }; 159 160 static struct ctl_table sunrpc_table[] = { 161 { 162 .procname = "sunrpc", 163 .mode = 0555, 164 .child = xs_tunables_table 165 }, 166 { }, 167 }; 168 169 /* 170 * Wait duration for a reply from the RPC portmapper. 171 */ 172 #define XS_BIND_TO (60U * HZ) 173 174 /* 175 * Delay if a UDP socket connect error occurs. This is most likely some 176 * kind of resource problem on the local host. 177 */ 178 #define XS_UDP_REEST_TO (2U * HZ) 179 180 /* 181 * The reestablish timeout allows clients to delay for a bit before attempting 182 * to reconnect to a server that just dropped our connection. 183 * 184 * We implement an exponential backoff when trying to reestablish a TCP 185 * transport connection with the server. Some servers like to drop a TCP 186 * connection when they are overworked, so we start with a short timeout and 187 * increase over time if the server is down or not responding. 188 */ 189 #define XS_TCP_INIT_REEST_TO (3U * HZ) 190 191 /* 192 * TCP idle timeout; client drops the transport socket if it is idle 193 * for this long. Note that we also timeout UDP sockets to prevent 194 * holding port numbers when there is no RPC traffic. 195 */ 196 #define XS_IDLE_DISC_TO (5U * 60 * HZ) 197 198 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 199 # undef RPC_DEBUG_DATA 200 # define RPCDBG_FACILITY RPCDBG_TRANS 201 #endif 202 203 #ifdef RPC_DEBUG_DATA 204 static void xs_pktdump(char *msg, u32 *packet, unsigned int count) 205 { 206 u8 *buf = (u8 *) packet; 207 int j; 208 209 dprintk("RPC: %s\n", msg); 210 for (j = 0; j < count && j < 128; j += 4) { 211 if (!(j & 31)) { 212 if (j) 213 dprintk("\n"); 214 dprintk("0x%04x ", j); 215 } 216 dprintk("%02x%02x%02x%02x ", 217 buf[j], buf[j+1], buf[j+2], buf[j+3]); 218 } 219 dprintk("\n"); 220 } 221 #else 222 static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) 223 { 224 /* NOP */ 225 } 226 #endif 227 228 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) 229 { 230 return (struct rpc_xprt *) sk->sk_user_data; 231 } 232 233 static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) 234 { 235 return (struct sockaddr *) &xprt->addr; 236 } 237 238 static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) 239 { 240 return (struct sockaddr_un *) &xprt->addr; 241 } 242 243 static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) 244 { 245 return (struct sockaddr_in *) &xprt->addr; 246 } 247 248 static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) 249 { 250 return (struct sockaddr_in6 *) &xprt->addr; 251 } 252 253 static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) 254 { 255 struct sockaddr *sap = xs_addr(xprt); 256 struct sockaddr_in6 *sin6; 257 struct sockaddr_in *sin; 258 struct sockaddr_un *sun; 259 char buf[128]; 260 261 switch (sap->sa_family) { 262 case AF_LOCAL: 263 sun = xs_addr_un(xprt); 264 strlcpy(buf, sun->sun_path, sizeof(buf)); 265 xprt->address_strings[RPC_DISPLAY_ADDR] = 266 kstrdup(buf, GFP_KERNEL); 267 break; 268 case AF_INET: 269 (void)rpc_ntop(sap, buf, sizeof(buf)); 270 xprt->address_strings[RPC_DISPLAY_ADDR] = 271 kstrdup(buf, GFP_KERNEL); 272 sin = xs_addr_in(xprt); 273 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 274 break; 275 case AF_INET6: 276 (void)rpc_ntop(sap, buf, sizeof(buf)); 277 xprt->address_strings[RPC_DISPLAY_ADDR] = 278 kstrdup(buf, GFP_KERNEL); 279 sin6 = xs_addr_in6(xprt); 280 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); 281 break; 282 default: 283 BUG(); 284 } 285 286 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 287 } 288 289 static void xs_format_common_peer_ports(struct rpc_xprt *xprt) 290 { 291 struct sockaddr *sap = xs_addr(xprt); 292 char buf[128]; 293 294 snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); 295 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); 296 297 snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); 298 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); 299 } 300 301 static void xs_format_peer_addresses(struct rpc_xprt *xprt, 302 const char *protocol, 303 const char *netid) 304 { 305 xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; 306 xprt->address_strings[RPC_DISPLAY_NETID] = netid; 307 xs_format_common_peer_addresses(xprt); 308 xs_format_common_peer_ports(xprt); 309 } 310 311 static void xs_update_peer_port(struct rpc_xprt *xprt) 312 { 313 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); 314 kfree(xprt->address_strings[RPC_DISPLAY_PORT]); 315 316 xs_format_common_peer_ports(xprt); 317 } 318 319 static void xs_free_peer_addresses(struct rpc_xprt *xprt) 320 { 321 unsigned int i; 322 323 for (i = 0; i < RPC_DISPLAY_MAX; i++) 324 switch (i) { 325 case RPC_DISPLAY_PROTO: 326 case RPC_DISPLAY_NETID: 327 continue; 328 default: 329 kfree(xprt->address_strings[i]); 330 } 331 } 332 333 static size_t 334 xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) 335 { 336 size_t i,n; 337 338 if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES)) 339 return want; 340 n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; 341 for (i = 0; i < n; i++) { 342 if (buf->pages[i]) 343 continue; 344 buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); 345 if (!buf->pages[i]) { 346 i *= PAGE_SIZE; 347 return i > buf->page_base ? i - buf->page_base : 0; 348 } 349 } 350 return want; 351 } 352 353 static ssize_t 354 xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) 355 { 356 ssize_t ret; 357 if (seek != 0) 358 iov_iter_advance(&msg->msg_iter, seek); 359 ret = sock_recvmsg(sock, msg, flags); 360 return ret > 0 ? ret + seek : ret; 361 } 362 363 static ssize_t 364 xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, 365 struct kvec *kvec, size_t count, size_t seek) 366 { 367 iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count); 368 return xs_sock_recvmsg(sock, msg, flags, seek); 369 } 370 371 static ssize_t 372 xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, 373 struct bio_vec *bvec, unsigned long nr, size_t count, 374 size_t seek) 375 { 376 iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count); 377 return xs_sock_recvmsg(sock, msg, flags, seek); 378 } 379 380 static ssize_t 381 xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, 382 size_t count) 383 { 384 iov_iter_discard(&msg->msg_iter, READ, count); 385 return sock_recvmsg(sock, msg, flags); 386 } 387 388 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 389 static void 390 xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) 391 { 392 struct bvec_iter bi = { 393 .bi_size = count, 394 }; 395 struct bio_vec bv; 396 397 bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); 398 for_each_bvec(bv, bvec, bi, bi) 399 flush_dcache_page(bv.bv_page); 400 } 401 #else 402 static inline void 403 xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) 404 { 405 } 406 #endif 407 408 static ssize_t 409 xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, 410 struct xdr_buf *buf, size_t count, size_t seek, size_t *read) 411 { 412 size_t want, seek_init = seek, offset = 0; 413 ssize_t ret; 414 415 want = min_t(size_t, count, buf->head[0].iov_len); 416 if (seek < want) { 417 ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek); 418 if (ret <= 0) 419 goto sock_err; 420 offset += ret; 421 if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) 422 goto out; 423 if (ret != want) 424 goto out; 425 seek = 0; 426 } else { 427 seek -= want; 428 offset += want; 429 } 430 431 want = xs_alloc_sparse_pages(buf, 432 min_t(size_t, count - offset, buf->page_len), 433 GFP_KERNEL); 434 if (seek < want) { 435 ret = xs_read_bvec(sock, msg, flags, buf->bvec, 436 xdr_buf_pagecount(buf), 437 want + buf->page_base, 438 seek + buf->page_base); 439 if (ret <= 0) 440 goto sock_err; 441 xs_flush_bvec(buf->bvec, ret, seek + buf->page_base); 442 ret -= buf->page_base; 443 offset += ret; 444 if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) 445 goto out; 446 if (ret != want) 447 goto out; 448 seek = 0; 449 } else { 450 seek -= want; 451 offset += want; 452 } 453 454 want = min_t(size_t, count - offset, buf->tail[0].iov_len); 455 if (seek < want) { 456 ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); 457 if (ret <= 0) 458 goto sock_err; 459 offset += ret; 460 if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) 461 goto out; 462 if (ret != want) 463 goto out; 464 } else if (offset < seek_init) 465 offset = seek_init; 466 ret = -EMSGSIZE; 467 out: 468 *read = offset - seek_init; 469 return ret; 470 sock_err: 471 offset += seek; 472 goto out; 473 } 474 475 static void 476 xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf) 477 { 478 if (!transport->recv.copied) { 479 if (buf->head[0].iov_len >= transport->recv.offset) 480 memcpy(buf->head[0].iov_base, 481 &transport->recv.xid, 482 transport->recv.offset); 483 transport->recv.copied = transport->recv.offset; 484 } 485 } 486 487 static bool 488 xs_read_stream_request_done(struct sock_xprt *transport) 489 { 490 return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); 491 } 492 493 static void 494 xs_read_stream_check_eor(struct sock_xprt *transport, 495 struct msghdr *msg) 496 { 497 if (xs_read_stream_request_done(transport)) 498 msg->msg_flags |= MSG_EOR; 499 } 500 501 static ssize_t 502 xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, 503 int flags, struct rpc_rqst *req) 504 { 505 struct xdr_buf *buf = &req->rq_private_buf; 506 size_t want, read; 507 ssize_t ret; 508 509 xs_read_header(transport, buf); 510 511 want = transport->recv.len - transport->recv.offset; 512 if (want != 0) { 513 ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, 514 transport->recv.copied + want, 515 transport->recv.copied, 516 &read); 517 transport->recv.offset += read; 518 transport->recv.copied += read; 519 } 520 521 if (transport->recv.offset == transport->recv.len) 522 xs_read_stream_check_eor(transport, msg); 523 524 if (want == 0) 525 return 0; 526 527 switch (ret) { 528 default: 529 break; 530 case -EFAULT: 531 case -EMSGSIZE: 532 msg->msg_flags |= MSG_TRUNC; 533 return read; 534 case 0: 535 return -ESHUTDOWN; 536 } 537 return ret < 0 ? ret : read; 538 } 539 540 static size_t 541 xs_read_stream_headersize(bool isfrag) 542 { 543 if (isfrag) 544 return sizeof(__be32); 545 return 3 * sizeof(__be32); 546 } 547 548 static ssize_t 549 xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg, 550 int flags, size_t want, size_t seek) 551 { 552 struct kvec kvec = { 553 .iov_base = &transport->recv.fraghdr, 554 .iov_len = want, 555 }; 556 return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek); 557 } 558 559 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 560 static ssize_t 561 xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) 562 { 563 struct rpc_xprt *xprt = &transport->xprt; 564 struct rpc_rqst *req; 565 ssize_t ret; 566 567 /* Is this transport associated with the backchannel? */ 568 if (!xprt->bc_serv) 569 return -ESHUTDOWN; 570 571 /* Look up and lock the request corresponding to the given XID */ 572 req = xprt_lookup_bc_request(xprt, transport->recv.xid); 573 if (!req) { 574 printk(KERN_WARNING "Callback slot table overflowed\n"); 575 return -ESHUTDOWN; 576 } 577 if (transport->recv.copied && !req->rq_private_buf.len) 578 return -ESHUTDOWN; 579 580 ret = xs_read_stream_request(transport, msg, flags, req); 581 if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) 582 xprt_complete_bc_request(req, transport->recv.copied); 583 else 584 req->rq_private_buf.len = transport->recv.copied; 585 586 return ret; 587 } 588 #else /* CONFIG_SUNRPC_BACKCHANNEL */ 589 static ssize_t 590 xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) 591 { 592 return -ESHUTDOWN; 593 } 594 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 595 596 static ssize_t 597 xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) 598 { 599 struct rpc_xprt *xprt = &transport->xprt; 600 struct rpc_rqst *req; 601 ssize_t ret = 0; 602 603 /* Look up and lock the request corresponding to the given XID */ 604 spin_lock(&xprt->queue_lock); 605 req = xprt_lookup_rqst(xprt, transport->recv.xid); 606 if (!req || (transport->recv.copied && !req->rq_private_buf.len)) { 607 msg->msg_flags |= MSG_TRUNC; 608 goto out; 609 } 610 xprt_pin_rqst(req); 611 spin_unlock(&xprt->queue_lock); 612 613 ret = xs_read_stream_request(transport, msg, flags, req); 614 615 spin_lock(&xprt->queue_lock); 616 if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) 617 xprt_complete_rqst(req->rq_task, transport->recv.copied); 618 else 619 req->rq_private_buf.len = transport->recv.copied; 620 xprt_unpin_rqst(req); 621 out: 622 spin_unlock(&xprt->queue_lock); 623 return ret; 624 } 625 626 static ssize_t 627 xs_read_stream(struct sock_xprt *transport, int flags) 628 { 629 struct msghdr msg = { 0 }; 630 size_t want, read = 0; 631 ssize_t ret = 0; 632 633 if (transport->recv.len == 0) { 634 want = xs_read_stream_headersize(transport->recv.copied != 0); 635 ret = xs_read_stream_header(transport, &msg, flags, want, 636 transport->recv.offset); 637 if (ret <= 0) 638 goto out_err; 639 transport->recv.offset = ret; 640 if (transport->recv.offset != want) 641 return transport->recv.offset; 642 transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & 643 RPC_FRAGMENT_SIZE_MASK; 644 transport->recv.offset -= sizeof(transport->recv.fraghdr); 645 read = ret; 646 } 647 648 switch (be32_to_cpu(transport->recv.calldir)) { 649 default: 650 msg.msg_flags |= MSG_TRUNC; 651 break; 652 case RPC_CALL: 653 ret = xs_read_stream_call(transport, &msg, flags); 654 break; 655 case RPC_REPLY: 656 ret = xs_read_stream_reply(transport, &msg, flags); 657 } 658 if (msg.msg_flags & MSG_TRUNC) { 659 transport->recv.calldir = cpu_to_be32(-1); 660 transport->recv.copied = -1; 661 } 662 if (ret < 0) 663 goto out_err; 664 read += ret; 665 if (transport->recv.offset < transport->recv.len) { 666 if (!(msg.msg_flags & MSG_TRUNC)) 667 return read; 668 msg.msg_flags = 0; 669 ret = xs_read_discard(transport->sock, &msg, flags, 670 transport->recv.len - transport->recv.offset); 671 if (ret <= 0) 672 goto out_err; 673 transport->recv.offset += ret; 674 read += ret; 675 if (transport->recv.offset != transport->recv.len) 676 return read; 677 } 678 if (xs_read_stream_request_done(transport)) { 679 trace_xs_stream_read_request(transport); 680 transport->recv.copied = 0; 681 } 682 transport->recv.offset = 0; 683 transport->recv.len = 0; 684 return read; 685 out_err: 686 return ret != 0 ? ret : -ESHUTDOWN; 687 } 688 689 static __poll_t xs_poll_socket(struct sock_xprt *transport) 690 { 691 return transport->sock->ops->poll(transport->file, transport->sock, 692 NULL); 693 } 694 695 static bool xs_poll_socket_readable(struct sock_xprt *transport) 696 { 697 __poll_t events = xs_poll_socket(transport); 698 699 return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP); 700 } 701 702 static void xs_poll_check_readable(struct sock_xprt *transport) 703 { 704 705 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 706 if (!xs_poll_socket_readable(transport)) 707 return; 708 if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 709 queue_work(xprtiod_workqueue, &transport->recv_worker); 710 } 711 712 static void xs_stream_data_receive(struct sock_xprt *transport) 713 { 714 size_t read = 0; 715 ssize_t ret = 0; 716 717 mutex_lock(&transport->recv_mutex); 718 if (transport->sock == NULL) 719 goto out; 720 for (;;) { 721 ret = xs_read_stream(transport, MSG_DONTWAIT); 722 if (ret < 0) 723 break; 724 read += ret; 725 cond_resched(); 726 } 727 if (ret == -ESHUTDOWN) 728 kernel_sock_shutdown(transport->sock, SHUT_RDWR); 729 else 730 xs_poll_check_readable(transport); 731 out: 732 mutex_unlock(&transport->recv_mutex); 733 trace_xs_stream_read_data(&transport->xprt, ret, read); 734 } 735 736 static void xs_stream_data_receive_workfn(struct work_struct *work) 737 { 738 struct sock_xprt *transport = 739 container_of(work, struct sock_xprt, recv_worker); 740 unsigned int pflags = memalloc_nofs_save(); 741 742 xs_stream_data_receive(transport); 743 memalloc_nofs_restore(pflags); 744 } 745 746 static void 747 xs_stream_reset_connect(struct sock_xprt *transport) 748 { 749 transport->recv.offset = 0; 750 transport->recv.len = 0; 751 transport->recv.copied = 0; 752 transport->xmit.offset = 0; 753 } 754 755 static void 756 xs_stream_start_connect(struct sock_xprt *transport) 757 { 758 transport->xprt.stat.connect_count++; 759 transport->xprt.stat.connect_start = jiffies; 760 } 761 762 #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 763 764 /** 765 * xs_nospace - handle transmit was incomplete 766 * @req: pointer to RPC request 767 * @transport: pointer to struct sock_xprt 768 * 769 */ 770 static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) 771 { 772 struct rpc_xprt *xprt = &transport->xprt; 773 struct sock *sk = transport->inet; 774 int ret = -EAGAIN; 775 776 trace_rpc_socket_nospace(req, transport); 777 778 /* Protect against races with write_space */ 779 spin_lock(&xprt->transport_lock); 780 781 /* Don't race with disconnect */ 782 if (xprt_connected(xprt)) { 783 /* wait for more buffer space */ 784 set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); 785 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 786 sk->sk_write_pending++; 787 xprt_wait_for_buffer_space(xprt); 788 } else 789 ret = -ENOTCONN; 790 791 spin_unlock(&xprt->transport_lock); 792 return ret; 793 } 794 795 static int xs_sock_nospace(struct rpc_rqst *req) 796 { 797 struct sock_xprt *transport = 798 container_of(req->rq_xprt, struct sock_xprt, xprt); 799 struct sock *sk = transport->inet; 800 int ret = -EAGAIN; 801 802 lock_sock(sk); 803 if (!sock_writeable(sk)) 804 ret = xs_nospace(req, transport); 805 release_sock(sk); 806 return ret; 807 } 808 809 static int xs_stream_nospace(struct rpc_rqst *req) 810 { 811 struct sock_xprt *transport = 812 container_of(req->rq_xprt, struct sock_xprt, xprt); 813 struct sock *sk = transport->inet; 814 int ret = -EAGAIN; 815 816 lock_sock(sk); 817 if (!sk_stream_memory_free(sk)) 818 ret = xs_nospace(req, transport); 819 release_sock(sk); 820 return ret; 821 } 822 823 static void 824 xs_stream_prepare_request(struct rpc_rqst *req) 825 { 826 xdr_free_bvec(&req->rq_rcv_buf); 827 req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); 828 } 829 830 /* 831 * Determine if the previous message in the stream was aborted before it 832 * could complete transmission. 833 */ 834 static bool 835 xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req) 836 { 837 return transport->xmit.offset != 0 && req->rq_bytes_sent == 0; 838 } 839 840 /* 841 * Return the stream record marker field for a record of length < 2^31-1 842 */ 843 static rpc_fraghdr 844 xs_stream_record_marker(struct xdr_buf *xdr) 845 { 846 if (!xdr->len) 847 return 0; 848 return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); 849 } 850 851 /** 852 * xs_local_send_request - write an RPC request to an AF_LOCAL socket 853 * @req: pointer to RPC request 854 * 855 * Return values: 856 * 0: The request has been sent 857 * EAGAIN: The socket was blocked, please call again later to 858 * complete the request 859 * ENOTCONN: Caller needs to invoke connect logic then call again 860 * other: Some other error occurred, the request was not sent 861 */ 862 static int xs_local_send_request(struct rpc_rqst *req) 863 { 864 struct rpc_xprt *xprt = req->rq_xprt; 865 struct sock_xprt *transport = 866 container_of(xprt, struct sock_xprt, xprt); 867 struct xdr_buf *xdr = &req->rq_snd_buf; 868 rpc_fraghdr rm = xs_stream_record_marker(xdr); 869 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; 870 struct msghdr msg = { 871 .msg_flags = XS_SENDMSG_FLAGS, 872 }; 873 unsigned int sent; 874 int status; 875 876 /* Close the stream if the previous transmission was incomplete */ 877 if (xs_send_request_was_aborted(transport, req)) { 878 xs_close(xprt); 879 return -ENOTCONN; 880 } 881 882 xs_pktdump("packet data:", 883 req->rq_svec->iov_base, req->rq_svec->iov_len); 884 885 req->rq_xtime = ktime_get(); 886 status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 887 transport->xmit.offset, rm, &sent); 888 dprintk("RPC: %s(%u) = %d\n", 889 __func__, xdr->len - transport->xmit.offset, status); 890 891 if (status == -EAGAIN && sock_writeable(transport->inet)) 892 status = -ENOBUFS; 893 894 if (likely(sent > 0) || status == 0) { 895 transport->xmit.offset += sent; 896 req->rq_bytes_sent = transport->xmit.offset; 897 if (likely(req->rq_bytes_sent >= msglen)) { 898 req->rq_xmit_bytes_sent += transport->xmit.offset; 899 transport->xmit.offset = 0; 900 return 0; 901 } 902 status = -EAGAIN; 903 } 904 905 switch (status) { 906 case -ENOBUFS: 907 break; 908 case -EAGAIN: 909 status = xs_stream_nospace(req); 910 break; 911 default: 912 dprintk("RPC: sendmsg returned unrecognized error %d\n", 913 -status); 914 fallthrough; 915 case -EPIPE: 916 xs_close(xprt); 917 status = -ENOTCONN; 918 } 919 920 return status; 921 } 922 923 /** 924 * xs_udp_send_request - write an RPC request to a UDP socket 925 * @req: pointer to RPC request 926 * 927 * Return values: 928 * 0: The request has been sent 929 * EAGAIN: The socket was blocked, please call again later to 930 * complete the request 931 * ENOTCONN: Caller needs to invoke connect logic then call again 932 * other: Some other error occurred, the request was not sent 933 */ 934 static int xs_udp_send_request(struct rpc_rqst *req) 935 { 936 struct rpc_xprt *xprt = req->rq_xprt; 937 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 938 struct xdr_buf *xdr = &req->rq_snd_buf; 939 struct msghdr msg = { 940 .msg_name = xs_addr(xprt), 941 .msg_namelen = xprt->addrlen, 942 .msg_flags = XS_SENDMSG_FLAGS, 943 }; 944 unsigned int sent; 945 int status; 946 947 xs_pktdump("packet data:", 948 req->rq_svec->iov_base, 949 req->rq_svec->iov_len); 950 951 if (!xprt_bound(xprt)) 952 return -ENOTCONN; 953 954 if (!xprt_request_get_cong(xprt, req)) 955 return -EBADSLT; 956 957 req->rq_xtime = ktime_get(); 958 status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); 959 960 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 961 xdr->len, status); 962 963 /* firewall is blocking us, don't return -EAGAIN or we end up looping */ 964 if (status == -EPERM) 965 goto process_status; 966 967 if (status == -EAGAIN && sock_writeable(transport->inet)) 968 status = -ENOBUFS; 969 970 if (sent > 0 || status == 0) { 971 req->rq_xmit_bytes_sent += sent; 972 if (sent >= req->rq_slen) 973 return 0; 974 /* Still some bytes left; set up for a retry later. */ 975 status = -EAGAIN; 976 } 977 978 process_status: 979 switch (status) { 980 case -ENOTSOCK: 981 status = -ENOTCONN; 982 /* Should we call xs_close() here? */ 983 break; 984 case -EAGAIN: 985 status = xs_sock_nospace(req); 986 break; 987 case -ENETUNREACH: 988 case -ENOBUFS: 989 case -EPIPE: 990 case -ECONNREFUSED: 991 case -EPERM: 992 /* When the server has died, an ICMP port unreachable message 993 * prompts ECONNREFUSED. */ 994 break; 995 default: 996 dprintk("RPC: sendmsg returned unrecognized error %d\n", 997 -status); 998 } 999 1000 return status; 1001 } 1002 1003 /** 1004 * xs_tcp_send_request - write an RPC request to a TCP socket 1005 * @req: pointer to RPC request 1006 * 1007 * Return values: 1008 * 0: The request has been sent 1009 * EAGAIN: The socket was blocked, please call again later to 1010 * complete the request 1011 * ENOTCONN: Caller needs to invoke connect logic then call again 1012 * other: Some other error occurred, the request was not sent 1013 * 1014 * XXX: In the case of soft timeouts, should we eventually give up 1015 * if sendmsg is not able to make progress? 1016 */ 1017 static int xs_tcp_send_request(struct rpc_rqst *req) 1018 { 1019 struct rpc_xprt *xprt = req->rq_xprt; 1020 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1021 struct xdr_buf *xdr = &req->rq_snd_buf; 1022 rpc_fraghdr rm = xs_stream_record_marker(xdr); 1023 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; 1024 struct msghdr msg = { 1025 .msg_flags = XS_SENDMSG_FLAGS, 1026 }; 1027 bool vm_wait = false; 1028 unsigned int sent; 1029 int status; 1030 1031 /* Close the stream if the previous transmission was incomplete */ 1032 if (xs_send_request_was_aborted(transport, req)) { 1033 if (transport->sock != NULL) 1034 kernel_sock_shutdown(transport->sock, SHUT_RDWR); 1035 return -ENOTCONN; 1036 } 1037 if (!transport->inet) 1038 return -ENOTCONN; 1039 1040 xs_pktdump("packet data:", 1041 req->rq_svec->iov_base, 1042 req->rq_svec->iov_len); 1043 1044 if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) 1045 xs_tcp_set_socket_timeouts(xprt, transport->sock); 1046 1047 xs_set_srcport(transport, transport->sock); 1048 1049 /* Continue transmitting the packet/record. We must be careful 1050 * to cope with writespace callbacks arriving _after_ we have 1051 * called sendmsg(). */ 1052 req->rq_xtime = ktime_get(); 1053 tcp_sock_set_cork(transport->inet, true); 1054 while (1) { 1055 status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 1056 transport->xmit.offset, rm, &sent); 1057 1058 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 1059 xdr->len - transport->xmit.offset, status); 1060 1061 /* If we've sent the entire packet, immediately 1062 * reset the count of bytes sent. */ 1063 transport->xmit.offset += sent; 1064 req->rq_bytes_sent = transport->xmit.offset; 1065 if (likely(req->rq_bytes_sent >= msglen)) { 1066 req->rq_xmit_bytes_sent += transport->xmit.offset; 1067 transport->xmit.offset = 0; 1068 if (atomic_long_read(&xprt->xmit_queuelen) == 1) 1069 tcp_sock_set_cork(transport->inet, false); 1070 return 0; 1071 } 1072 1073 WARN_ON_ONCE(sent == 0 && status == 0); 1074 1075 if (status == -EAGAIN ) { 1076 /* 1077 * Return EAGAIN if we're sure we're hitting the 1078 * socket send buffer limits. 1079 */ 1080 if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) 1081 break; 1082 /* 1083 * Did we hit a memory allocation failure? 1084 */ 1085 if (sent == 0) { 1086 status = -ENOBUFS; 1087 if (vm_wait) 1088 break; 1089 /* Retry, knowing now that we're below the 1090 * socket send buffer limit 1091 */ 1092 vm_wait = true; 1093 } 1094 continue; 1095 } 1096 if (status < 0) 1097 break; 1098 vm_wait = false; 1099 } 1100 1101 switch (status) { 1102 case -ENOTSOCK: 1103 status = -ENOTCONN; 1104 /* Should we call xs_close() here? */ 1105 break; 1106 case -EAGAIN: 1107 status = xs_stream_nospace(req); 1108 break; 1109 case -ECONNRESET: 1110 case -ECONNREFUSED: 1111 case -ENOTCONN: 1112 case -EADDRINUSE: 1113 case -ENOBUFS: 1114 case -EPIPE: 1115 break; 1116 default: 1117 dprintk("RPC: sendmsg returned unrecognized error %d\n", 1118 -status); 1119 } 1120 1121 return status; 1122 } 1123 1124 static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) 1125 { 1126 transport->old_data_ready = sk->sk_data_ready; 1127 transport->old_state_change = sk->sk_state_change; 1128 transport->old_write_space = sk->sk_write_space; 1129 transport->old_error_report = sk->sk_error_report; 1130 } 1131 1132 static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) 1133 { 1134 sk->sk_data_ready = transport->old_data_ready; 1135 sk->sk_state_change = transport->old_state_change; 1136 sk->sk_write_space = transport->old_write_space; 1137 sk->sk_error_report = transport->old_error_report; 1138 } 1139 1140 static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) 1141 { 1142 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1143 1144 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1145 clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); 1146 clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); 1147 clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); 1148 clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); 1149 } 1150 1151 static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) 1152 { 1153 set_bit(nr, &transport->sock_state); 1154 queue_work(xprtiod_workqueue, &transport->error_worker); 1155 } 1156 1157 static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) 1158 { 1159 xprt->connect_cookie++; 1160 smp_mb__before_atomic(); 1161 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1162 clear_bit(XPRT_CLOSING, &xprt->state); 1163 xs_sock_reset_state_flags(xprt); 1164 smp_mb__after_atomic(); 1165 } 1166 1167 /** 1168 * xs_error_report - callback to handle TCP socket state errors 1169 * @sk: socket 1170 * 1171 * Note: we don't call sock_error() since there may be a rpc_task 1172 * using the socket, and so we don't want to clear sk->sk_err. 1173 */ 1174 static void xs_error_report(struct sock *sk) 1175 { 1176 struct sock_xprt *transport; 1177 struct rpc_xprt *xprt; 1178 1179 if (!(xprt = xprt_from_sock(sk))) 1180 return; 1181 1182 transport = container_of(xprt, struct sock_xprt, xprt); 1183 transport->xprt_err = -sk->sk_err; 1184 if (transport->xprt_err == 0) 1185 return; 1186 dprintk("RPC: xs_error_report client %p, error=%d...\n", 1187 xprt, -transport->xprt_err); 1188 trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err); 1189 1190 /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ 1191 smp_mb__before_atomic(); 1192 xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); 1193 } 1194 1195 static void xs_reset_transport(struct sock_xprt *transport) 1196 { 1197 struct socket *sock = transport->sock; 1198 struct sock *sk = transport->inet; 1199 struct rpc_xprt *xprt = &transport->xprt; 1200 struct file *filp = transport->file; 1201 1202 if (sk == NULL) 1203 return; 1204 1205 if (atomic_read(&transport->xprt.swapper)) 1206 sk_clear_memalloc(sk); 1207 1208 kernel_sock_shutdown(sock, SHUT_RDWR); 1209 1210 mutex_lock(&transport->recv_mutex); 1211 lock_sock(sk); 1212 transport->inet = NULL; 1213 transport->sock = NULL; 1214 transport->file = NULL; 1215 1216 sk->sk_user_data = NULL; 1217 1218 xs_restore_old_callbacks(transport, sk); 1219 xprt_clear_connected(xprt); 1220 xs_sock_reset_connection_flags(xprt); 1221 /* Reset stream record info */ 1222 xs_stream_reset_connect(transport); 1223 release_sock(sk); 1224 mutex_unlock(&transport->recv_mutex); 1225 1226 trace_rpc_socket_close(xprt, sock); 1227 fput(filp); 1228 1229 xprt_disconnect_done(xprt); 1230 } 1231 1232 /** 1233 * xs_close - close a socket 1234 * @xprt: transport 1235 * 1236 * This is used when all requests are complete; ie, no DRC state remains 1237 * on the server we want to save. 1238 * 1239 * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with 1240 * xs_reset_transport() zeroing the socket from underneath a writer. 1241 */ 1242 static void xs_close(struct rpc_xprt *xprt) 1243 { 1244 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1245 1246 dprintk("RPC: xs_close xprt %p\n", xprt); 1247 1248 xs_reset_transport(transport); 1249 xprt->reestablish_timeout = 0; 1250 } 1251 1252 static void xs_inject_disconnect(struct rpc_xprt *xprt) 1253 { 1254 dprintk("RPC: injecting transport disconnect on xprt=%p\n", 1255 xprt); 1256 xprt_disconnect_done(xprt); 1257 } 1258 1259 static void xs_xprt_free(struct rpc_xprt *xprt) 1260 { 1261 xs_free_peer_addresses(xprt); 1262 xprt_free(xprt); 1263 } 1264 1265 /** 1266 * xs_destroy - prepare to shutdown a transport 1267 * @xprt: doomed transport 1268 * 1269 */ 1270 static void xs_destroy(struct rpc_xprt *xprt) 1271 { 1272 struct sock_xprt *transport = container_of(xprt, 1273 struct sock_xprt, xprt); 1274 dprintk("RPC: xs_destroy xprt %p\n", xprt); 1275 1276 cancel_delayed_work_sync(&transport->connect_worker); 1277 xs_close(xprt); 1278 cancel_work_sync(&transport->recv_worker); 1279 cancel_work_sync(&transport->error_worker); 1280 xs_xprt_free(xprt); 1281 module_put(THIS_MODULE); 1282 } 1283 1284 /** 1285 * xs_udp_data_read_skb - receive callback for UDP sockets 1286 * @xprt: transport 1287 * @sk: socket 1288 * @skb: skbuff 1289 * 1290 */ 1291 static void xs_udp_data_read_skb(struct rpc_xprt *xprt, 1292 struct sock *sk, 1293 struct sk_buff *skb) 1294 { 1295 struct rpc_task *task; 1296 struct rpc_rqst *rovr; 1297 int repsize, copied; 1298 u32 _xid; 1299 __be32 *xp; 1300 1301 repsize = skb->len; 1302 if (repsize < 4) { 1303 dprintk("RPC: impossible RPC reply size %d!\n", repsize); 1304 return; 1305 } 1306 1307 /* Copy the XID from the skb... */ 1308 xp = skb_header_pointer(skb, 0, sizeof(_xid), &_xid); 1309 if (xp == NULL) 1310 return; 1311 1312 /* Look up and lock the request corresponding to the given XID */ 1313 spin_lock(&xprt->queue_lock); 1314 rovr = xprt_lookup_rqst(xprt, *xp); 1315 if (!rovr) 1316 goto out_unlock; 1317 xprt_pin_rqst(rovr); 1318 xprt_update_rtt(rovr->rq_task); 1319 spin_unlock(&xprt->queue_lock); 1320 task = rovr->rq_task; 1321 1322 if ((copied = rovr->rq_private_buf.buflen) > repsize) 1323 copied = repsize; 1324 1325 /* Suck it into the iovec, verify checksum if not done by hw. */ 1326 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1327 spin_lock(&xprt->queue_lock); 1328 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1329 goto out_unpin; 1330 } 1331 1332 1333 spin_lock(&xprt->transport_lock); 1334 xprt_adjust_cwnd(xprt, task, copied); 1335 spin_unlock(&xprt->transport_lock); 1336 spin_lock(&xprt->queue_lock); 1337 xprt_complete_rqst(task, copied); 1338 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1339 out_unpin: 1340 xprt_unpin_rqst(rovr); 1341 out_unlock: 1342 spin_unlock(&xprt->queue_lock); 1343 } 1344 1345 static void xs_udp_data_receive(struct sock_xprt *transport) 1346 { 1347 struct sk_buff *skb; 1348 struct sock *sk; 1349 int err; 1350 1351 mutex_lock(&transport->recv_mutex); 1352 sk = transport->inet; 1353 if (sk == NULL) 1354 goto out; 1355 for (;;) { 1356 skb = skb_recv_udp(sk, 0, 1, &err); 1357 if (skb == NULL) 1358 break; 1359 xs_udp_data_read_skb(&transport->xprt, sk, skb); 1360 consume_skb(skb); 1361 cond_resched(); 1362 } 1363 xs_poll_check_readable(transport); 1364 out: 1365 mutex_unlock(&transport->recv_mutex); 1366 } 1367 1368 static void xs_udp_data_receive_workfn(struct work_struct *work) 1369 { 1370 struct sock_xprt *transport = 1371 container_of(work, struct sock_xprt, recv_worker); 1372 unsigned int pflags = memalloc_nofs_save(); 1373 1374 xs_udp_data_receive(transport); 1375 memalloc_nofs_restore(pflags); 1376 } 1377 1378 /** 1379 * xs_data_ready - "data ready" callback for UDP sockets 1380 * @sk: socket with data to read 1381 * 1382 */ 1383 static void xs_data_ready(struct sock *sk) 1384 { 1385 struct rpc_xprt *xprt; 1386 1387 dprintk("RPC: xs_data_ready...\n"); 1388 xprt = xprt_from_sock(sk); 1389 if (xprt != NULL) { 1390 struct sock_xprt *transport = container_of(xprt, 1391 struct sock_xprt, xprt); 1392 transport->old_data_ready(sk); 1393 /* Any data means we had a useful conversation, so 1394 * then we don't need to delay the next reconnect 1395 */ 1396 if (xprt->reestablish_timeout) 1397 xprt->reestablish_timeout = 0; 1398 if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1399 queue_work(xprtiod_workqueue, &transport->recv_worker); 1400 } 1401 } 1402 1403 /* 1404 * Helper function to force a TCP close if the server is sending 1405 * junk and/or it has put us in CLOSE_WAIT 1406 */ 1407 static void xs_tcp_force_close(struct rpc_xprt *xprt) 1408 { 1409 xprt_force_disconnect(xprt); 1410 } 1411 1412 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1413 static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) 1414 { 1415 return PAGE_SIZE; 1416 } 1417 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1418 1419 /** 1420 * xs_tcp_state_change - callback to handle TCP socket state changes 1421 * @sk: socket whose state has changed 1422 * 1423 */ 1424 static void xs_tcp_state_change(struct sock *sk) 1425 { 1426 struct rpc_xprt *xprt; 1427 struct sock_xprt *transport; 1428 1429 if (!(xprt = xprt_from_sock(sk))) 1430 return; 1431 dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); 1432 dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", 1433 sk->sk_state, xprt_connected(xprt), 1434 sock_flag(sk, SOCK_DEAD), 1435 sock_flag(sk, SOCK_ZAPPED), 1436 sk->sk_shutdown); 1437 1438 transport = container_of(xprt, struct sock_xprt, xprt); 1439 trace_rpc_socket_state_change(xprt, sk->sk_socket); 1440 switch (sk->sk_state) { 1441 case TCP_ESTABLISHED: 1442 if (!xprt_test_and_set_connected(xprt)) { 1443 xprt->connect_cookie++; 1444 clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); 1445 xprt_clear_connecting(xprt); 1446 1447 xprt->stat.connect_count++; 1448 xprt->stat.connect_time += (long)jiffies - 1449 xprt->stat.connect_start; 1450 xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); 1451 } 1452 break; 1453 case TCP_FIN_WAIT1: 1454 /* The client initiated a shutdown of the socket */ 1455 xprt->connect_cookie++; 1456 xprt->reestablish_timeout = 0; 1457 set_bit(XPRT_CLOSING, &xprt->state); 1458 smp_mb__before_atomic(); 1459 clear_bit(XPRT_CONNECTED, &xprt->state); 1460 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1461 smp_mb__after_atomic(); 1462 break; 1463 case TCP_CLOSE_WAIT: 1464 /* The server initiated a shutdown of the socket */ 1465 xprt->connect_cookie++; 1466 clear_bit(XPRT_CONNECTED, &xprt->state); 1467 xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); 1468 fallthrough; 1469 case TCP_CLOSING: 1470 /* 1471 * If the server closed down the connection, make sure that 1472 * we back off before reconnecting 1473 */ 1474 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 1475 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1476 break; 1477 case TCP_LAST_ACK: 1478 set_bit(XPRT_CLOSING, &xprt->state); 1479 smp_mb__before_atomic(); 1480 clear_bit(XPRT_CONNECTED, &xprt->state); 1481 smp_mb__after_atomic(); 1482 break; 1483 case TCP_CLOSE: 1484 if (test_and_clear_bit(XPRT_SOCK_CONNECTING, 1485 &transport->sock_state)) 1486 xprt_clear_connecting(xprt); 1487 clear_bit(XPRT_CLOSING, &xprt->state); 1488 /* Trigger the socket release */ 1489 xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); 1490 } 1491 } 1492 1493 static void xs_write_space(struct sock *sk) 1494 { 1495 struct sock_xprt *transport; 1496 struct rpc_xprt *xprt; 1497 1498 if (!sk->sk_socket) 1499 return; 1500 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1501 1502 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1503 return; 1504 transport = container_of(xprt, struct sock_xprt, xprt); 1505 if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state)) 1506 return; 1507 xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); 1508 sk->sk_write_pending--; 1509 } 1510 1511 /** 1512 * xs_udp_write_space - callback invoked when socket buffer space 1513 * becomes available 1514 * @sk: socket whose state has changed 1515 * 1516 * Called when more output buffer space is available for this socket. 1517 * We try not to wake our writers until they can make "significant" 1518 * progress, otherwise we'll waste resources thrashing kernel_sendmsg 1519 * with a bunch of small requests. 1520 */ 1521 static void xs_udp_write_space(struct sock *sk) 1522 { 1523 /* from net/core/sock.c:sock_def_write_space */ 1524 if (sock_writeable(sk)) 1525 xs_write_space(sk); 1526 } 1527 1528 /** 1529 * xs_tcp_write_space - callback invoked when socket buffer space 1530 * becomes available 1531 * @sk: socket whose state has changed 1532 * 1533 * Called when more output buffer space is available for this socket. 1534 * We try not to wake our writers until they can make "significant" 1535 * progress, otherwise we'll waste resources thrashing kernel_sendmsg 1536 * with a bunch of small requests. 1537 */ 1538 static void xs_tcp_write_space(struct sock *sk) 1539 { 1540 /* from net/core/stream.c:sk_stream_write_space */ 1541 if (sk_stream_is_writeable(sk)) 1542 xs_write_space(sk); 1543 } 1544 1545 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) 1546 { 1547 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1548 struct sock *sk = transport->inet; 1549 1550 if (transport->rcvsize) { 1551 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 1552 sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; 1553 } 1554 if (transport->sndsize) { 1555 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 1556 sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; 1557 sk->sk_write_space(sk); 1558 } 1559 } 1560 1561 /** 1562 * xs_udp_set_buffer_size - set send and receive limits 1563 * @xprt: generic transport 1564 * @sndsize: requested size of send buffer, in bytes 1565 * @rcvsize: requested size of receive buffer, in bytes 1566 * 1567 * Set socket send and receive buffer size limits. 1568 */ 1569 static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) 1570 { 1571 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1572 1573 transport->sndsize = 0; 1574 if (sndsize) 1575 transport->sndsize = sndsize + 1024; 1576 transport->rcvsize = 0; 1577 if (rcvsize) 1578 transport->rcvsize = rcvsize + 1024; 1579 1580 xs_udp_do_set_buffer_size(xprt); 1581 } 1582 1583 /** 1584 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport 1585 * @xprt: controlling transport 1586 * @task: task that timed out 1587 * 1588 * Adjust the congestion window after a retransmit timeout has occurred. 1589 */ 1590 static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) 1591 { 1592 spin_lock(&xprt->transport_lock); 1593 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); 1594 spin_unlock(&xprt->transport_lock); 1595 } 1596 1597 static int xs_get_random_port(void) 1598 { 1599 unsigned short min = xprt_min_resvport, max = xprt_max_resvport; 1600 unsigned short range; 1601 unsigned short rand; 1602 1603 if (max < min) 1604 return -EADDRINUSE; 1605 range = max - min + 1; 1606 rand = (unsigned short) prandom_u32() % range; 1607 return rand + min; 1608 } 1609 1610 static unsigned short xs_sock_getport(struct socket *sock) 1611 { 1612 struct sockaddr_storage buf; 1613 unsigned short port = 0; 1614 1615 if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0) 1616 goto out; 1617 switch (buf.ss_family) { 1618 case AF_INET6: 1619 port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); 1620 break; 1621 case AF_INET: 1622 port = ntohs(((struct sockaddr_in *)&buf)->sin_port); 1623 } 1624 out: 1625 return port; 1626 } 1627 1628 /** 1629 * xs_set_port - reset the port number in the remote endpoint address 1630 * @xprt: generic transport 1631 * @port: new port number 1632 * 1633 */ 1634 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) 1635 { 1636 dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); 1637 1638 rpc_set_port(xs_addr(xprt), port); 1639 xs_update_peer_port(xprt); 1640 } 1641 1642 static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) 1643 { 1644 if (transport->srcport == 0 && transport->xprt.reuseport) 1645 transport->srcport = xs_sock_getport(sock); 1646 } 1647 1648 static int xs_get_srcport(struct sock_xprt *transport) 1649 { 1650 int port = transport->srcport; 1651 1652 if (port == 0 && transport->xprt.resvport) 1653 port = xs_get_random_port(); 1654 return port; 1655 } 1656 1657 unsigned short get_srcport(struct rpc_xprt *xprt) 1658 { 1659 struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); 1660 unsigned short ret = 0; 1661 mutex_lock(&sock->recv_mutex); 1662 if (sock->sock) 1663 ret = xs_sock_getport(sock->sock); 1664 mutex_unlock(&sock->recv_mutex); 1665 return ret; 1666 } 1667 EXPORT_SYMBOL(get_srcport); 1668 1669 static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) 1670 { 1671 if (transport->srcport != 0) 1672 transport->srcport = 0; 1673 if (!transport->xprt.resvport) 1674 return 0; 1675 if (port <= xprt_min_resvport || port > xprt_max_resvport) 1676 return xprt_max_resvport; 1677 return --port; 1678 } 1679 static int xs_bind(struct sock_xprt *transport, struct socket *sock) 1680 { 1681 struct sockaddr_storage myaddr; 1682 int err, nloop = 0; 1683 int port = xs_get_srcport(transport); 1684 unsigned short last; 1685 1686 /* 1687 * If we are asking for any ephemeral port (i.e. port == 0 && 1688 * transport->xprt.resvport == 0), don't bind. Let the local 1689 * port selection happen implicitly when the socket is used 1690 * (for example at connect time). 1691 * 1692 * This ensures that we can continue to establish TCP 1693 * connections even when all local ephemeral ports are already 1694 * a part of some TCP connection. This makes no difference 1695 * for UDP sockets, but also doesn't harm them. 1696 * 1697 * If we're asking for any reserved port (i.e. port == 0 && 1698 * transport->xprt.resvport == 1) xs_get_srcport above will 1699 * ensure that port is non-zero and we will bind as needed. 1700 */ 1701 if (port <= 0) 1702 return port; 1703 1704 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); 1705 do { 1706 rpc_set_port((struct sockaddr *)&myaddr, port); 1707 err = kernel_bind(sock, (struct sockaddr *)&myaddr, 1708 transport->xprt.addrlen); 1709 if (err == 0) { 1710 if (transport->xprt.reuseport) 1711 transport->srcport = port; 1712 break; 1713 } 1714 last = port; 1715 port = xs_next_srcport(transport, port); 1716 if (port > last) 1717 nloop++; 1718 } while (err == -EADDRINUSE && nloop != 2); 1719 1720 if (myaddr.ss_family == AF_INET) 1721 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__, 1722 &((struct sockaddr_in *)&myaddr)->sin_addr, 1723 port, err ? "failed" : "ok", err); 1724 else 1725 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__, 1726 &((struct sockaddr_in6 *)&myaddr)->sin6_addr, 1727 port, err ? "failed" : "ok", err); 1728 return err; 1729 } 1730 1731 /* 1732 * We don't support autobind on AF_LOCAL sockets 1733 */ 1734 static void xs_local_rpcbind(struct rpc_task *task) 1735 { 1736 xprt_set_bound(task->tk_xprt); 1737 } 1738 1739 static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) 1740 { 1741 } 1742 1743 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1744 static struct lock_class_key xs_key[3]; 1745 static struct lock_class_key xs_slock_key[3]; 1746 1747 static inline void xs_reclassify_socketu(struct socket *sock) 1748 { 1749 struct sock *sk = sock->sk; 1750 1751 sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", 1752 &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC", &xs_key[0]); 1753 } 1754 1755 static inline void xs_reclassify_socket4(struct socket *sock) 1756 { 1757 struct sock *sk = sock->sk; 1758 1759 sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", 1760 &xs_slock_key[1], "sk_lock-AF_INET-RPC", &xs_key[1]); 1761 } 1762 1763 static inline void xs_reclassify_socket6(struct socket *sock) 1764 { 1765 struct sock *sk = sock->sk; 1766 1767 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1768 &xs_slock_key[2], "sk_lock-AF_INET6-RPC", &xs_key[2]); 1769 } 1770 1771 static inline void xs_reclassify_socket(int family, struct socket *sock) 1772 { 1773 if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk))) 1774 return; 1775 1776 switch (family) { 1777 case AF_LOCAL: 1778 xs_reclassify_socketu(sock); 1779 break; 1780 case AF_INET: 1781 xs_reclassify_socket4(sock); 1782 break; 1783 case AF_INET6: 1784 xs_reclassify_socket6(sock); 1785 break; 1786 } 1787 } 1788 #else 1789 static inline void xs_reclassify_socket(int family, struct socket *sock) 1790 { 1791 } 1792 #endif 1793 1794 static void xs_dummy_setup_socket(struct work_struct *work) 1795 { 1796 } 1797 1798 static struct socket *xs_create_sock(struct rpc_xprt *xprt, 1799 struct sock_xprt *transport, int family, int type, 1800 int protocol, bool reuseport) 1801 { 1802 struct file *filp; 1803 struct socket *sock; 1804 int err; 1805 1806 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1); 1807 if (err < 0) { 1808 dprintk("RPC: can't create %d transport socket (%d).\n", 1809 protocol, -err); 1810 goto out; 1811 } 1812 xs_reclassify_socket(family, sock); 1813 1814 if (reuseport) 1815 sock_set_reuseport(sock->sk); 1816 1817 err = xs_bind(transport, sock); 1818 if (err) { 1819 sock_release(sock); 1820 goto out; 1821 } 1822 1823 filp = sock_alloc_file(sock, O_NONBLOCK, NULL); 1824 if (IS_ERR(filp)) 1825 return ERR_CAST(filp); 1826 transport->file = filp; 1827 1828 return sock; 1829 out: 1830 return ERR_PTR(err); 1831 } 1832 1833 static int xs_local_finish_connecting(struct rpc_xprt *xprt, 1834 struct socket *sock) 1835 { 1836 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, 1837 xprt); 1838 1839 if (!transport->inet) { 1840 struct sock *sk = sock->sk; 1841 1842 lock_sock(sk); 1843 1844 xs_save_old_callbacks(transport, sk); 1845 1846 sk->sk_user_data = xprt; 1847 sk->sk_data_ready = xs_data_ready; 1848 sk->sk_write_space = xs_udp_write_space; 1849 sk->sk_error_report = xs_error_report; 1850 1851 xprt_clear_connected(xprt); 1852 1853 /* Reset to new socket */ 1854 transport->sock = sock; 1855 transport->inet = sk; 1856 1857 release_sock(sk); 1858 } 1859 1860 xs_stream_start_connect(transport); 1861 1862 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); 1863 } 1864 1865 /** 1866 * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint 1867 * @transport: socket transport to connect 1868 */ 1869 static int xs_local_setup_socket(struct sock_xprt *transport) 1870 { 1871 struct rpc_xprt *xprt = &transport->xprt; 1872 struct file *filp; 1873 struct socket *sock; 1874 int status; 1875 1876 status = __sock_create(xprt->xprt_net, AF_LOCAL, 1877 SOCK_STREAM, 0, &sock, 1); 1878 if (status < 0) { 1879 dprintk("RPC: can't create AF_LOCAL " 1880 "transport socket (%d).\n", -status); 1881 goto out; 1882 } 1883 xs_reclassify_socket(AF_LOCAL, sock); 1884 1885 filp = sock_alloc_file(sock, O_NONBLOCK, NULL); 1886 if (IS_ERR(filp)) { 1887 status = PTR_ERR(filp); 1888 goto out; 1889 } 1890 transport->file = filp; 1891 1892 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", 1893 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1894 1895 status = xs_local_finish_connecting(xprt, sock); 1896 trace_rpc_socket_connect(xprt, sock, status); 1897 switch (status) { 1898 case 0: 1899 dprintk("RPC: xprt %p connected to %s\n", 1900 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1901 xprt->stat.connect_count++; 1902 xprt->stat.connect_time += (long)jiffies - 1903 xprt->stat.connect_start; 1904 xprt_set_connected(xprt); 1905 break; 1906 case -ENOBUFS: 1907 break; 1908 case -ENOENT: 1909 dprintk("RPC: xprt %p: socket %s does not exist\n", 1910 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1911 break; 1912 case -ECONNREFUSED: 1913 dprintk("RPC: xprt %p: connection refused for %s\n", 1914 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1915 break; 1916 default: 1917 printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", 1918 __func__, -status, 1919 xprt->address_strings[RPC_DISPLAY_ADDR]); 1920 } 1921 1922 out: 1923 xprt_clear_connecting(xprt); 1924 xprt_wake_pending_tasks(xprt, status); 1925 return status; 1926 } 1927 1928 static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) 1929 { 1930 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1931 int ret; 1932 1933 if (RPC_IS_ASYNC(task)) { 1934 /* 1935 * We want the AF_LOCAL connect to be resolved in the 1936 * filesystem namespace of the process making the rpc 1937 * call. Thus we connect synchronously. 1938 * 1939 * If we want to support asynchronous AF_LOCAL calls, 1940 * we'll need to figure out how to pass a namespace to 1941 * connect. 1942 */ 1943 task->tk_rpc_status = -ENOTCONN; 1944 rpc_exit(task, -ENOTCONN); 1945 return; 1946 } 1947 ret = xs_local_setup_socket(transport); 1948 if (ret && !RPC_IS_SOFTCONN(task)) 1949 msleep_interruptible(15000); 1950 } 1951 1952 #if IS_ENABLED(CONFIG_SUNRPC_SWAP) 1953 /* 1954 * Note that this should be called with XPRT_LOCKED held, or recv_mutex 1955 * held, or when we otherwise know that we have exclusive access to the 1956 * socket, to guard against races with xs_reset_transport. 1957 */ 1958 static void xs_set_memalloc(struct rpc_xprt *xprt) 1959 { 1960 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, 1961 xprt); 1962 1963 /* 1964 * If there's no sock, then we have nothing to set. The 1965 * reconnecting process will get it for us. 1966 */ 1967 if (!transport->inet) 1968 return; 1969 if (atomic_read(&xprt->swapper)) 1970 sk_set_memalloc(transport->inet); 1971 } 1972 1973 /** 1974 * xs_enable_swap - Tag this transport as being used for swap. 1975 * @xprt: transport to tag 1976 * 1977 * Take a reference to this transport on behalf of the rpc_clnt, and 1978 * optionally mark it for swapping if it wasn't already. 1979 */ 1980 static int 1981 xs_enable_swap(struct rpc_xprt *xprt) 1982 { 1983 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); 1984 1985 mutex_lock(&xs->recv_mutex); 1986 if (atomic_inc_return(&xprt->swapper) == 1 && 1987 xs->inet) 1988 sk_set_memalloc(xs->inet); 1989 mutex_unlock(&xs->recv_mutex); 1990 return 0; 1991 } 1992 1993 /** 1994 * xs_disable_swap - Untag this transport as being used for swap. 1995 * @xprt: transport to tag 1996 * 1997 * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the 1998 * swapper refcount goes to 0, untag the socket as a memalloc socket. 1999 */ 2000 static void 2001 xs_disable_swap(struct rpc_xprt *xprt) 2002 { 2003 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); 2004 2005 mutex_lock(&xs->recv_mutex); 2006 if (atomic_dec_and_test(&xprt->swapper) && 2007 xs->inet) 2008 sk_clear_memalloc(xs->inet); 2009 mutex_unlock(&xs->recv_mutex); 2010 } 2011 #else 2012 static void xs_set_memalloc(struct rpc_xprt *xprt) 2013 { 2014 } 2015 2016 static int 2017 xs_enable_swap(struct rpc_xprt *xprt) 2018 { 2019 return -EINVAL; 2020 } 2021 2022 static void 2023 xs_disable_swap(struct rpc_xprt *xprt) 2024 { 2025 } 2026 #endif 2027 2028 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2029 { 2030 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2031 2032 if (!transport->inet) { 2033 struct sock *sk = sock->sk; 2034 2035 lock_sock(sk); 2036 2037 xs_save_old_callbacks(transport, sk); 2038 2039 sk->sk_user_data = xprt; 2040 sk->sk_data_ready = xs_data_ready; 2041 sk->sk_write_space = xs_udp_write_space; 2042 2043 xprt_set_connected(xprt); 2044 2045 /* Reset to new socket */ 2046 transport->sock = sock; 2047 transport->inet = sk; 2048 2049 xs_set_memalloc(xprt); 2050 2051 release_sock(sk); 2052 } 2053 xs_udp_do_set_buffer_size(xprt); 2054 2055 xprt->stat.connect_start = jiffies; 2056 } 2057 2058 static void xs_udp_setup_socket(struct work_struct *work) 2059 { 2060 struct sock_xprt *transport = 2061 container_of(work, struct sock_xprt, connect_worker.work); 2062 struct rpc_xprt *xprt = &transport->xprt; 2063 struct socket *sock; 2064 int status = -EIO; 2065 unsigned int pflags = current->flags; 2066 2067 if (atomic_read(&xprt->swapper)) 2068 current->flags |= PF_MEMALLOC; 2069 sock = xs_create_sock(xprt, transport, 2070 xs_addr(xprt)->sa_family, SOCK_DGRAM, 2071 IPPROTO_UDP, false); 2072 if (IS_ERR(sock)) 2073 goto out; 2074 2075 dprintk("RPC: worker connecting xprt %p via %s to " 2076 "%s (port %s)\n", xprt, 2077 xprt->address_strings[RPC_DISPLAY_PROTO], 2078 xprt->address_strings[RPC_DISPLAY_ADDR], 2079 xprt->address_strings[RPC_DISPLAY_PORT]); 2080 2081 xs_udp_finish_connecting(xprt, sock); 2082 trace_rpc_socket_connect(xprt, sock, 0); 2083 status = 0; 2084 out: 2085 xprt_clear_connecting(xprt); 2086 xprt_unlock_connect(xprt, transport); 2087 xprt_wake_pending_tasks(xprt, status); 2088 current_restore_flags(pflags, PF_MEMALLOC); 2089 } 2090 2091 /** 2092 * xs_tcp_shutdown - gracefully shut down a TCP socket 2093 * @xprt: transport 2094 * 2095 * Initiates a graceful shutdown of the TCP socket by calling the 2096 * equivalent of shutdown(SHUT_RDWR); 2097 */ 2098 static void xs_tcp_shutdown(struct rpc_xprt *xprt) 2099 { 2100 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2101 struct socket *sock = transport->sock; 2102 int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE; 2103 2104 if (sock == NULL) 2105 return; 2106 if (!xprt->reuseport) { 2107 xs_close(xprt); 2108 return; 2109 } 2110 switch (skst) { 2111 case TCP_FIN_WAIT1: 2112 case TCP_FIN_WAIT2: 2113 break; 2114 case TCP_ESTABLISHED: 2115 case TCP_CLOSE_WAIT: 2116 kernel_sock_shutdown(sock, SHUT_RDWR); 2117 trace_rpc_socket_shutdown(xprt, sock); 2118 break; 2119 default: 2120 xs_reset_transport(transport); 2121 } 2122 } 2123 2124 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 2125 struct socket *sock) 2126 { 2127 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2128 unsigned int keepidle; 2129 unsigned int keepcnt; 2130 unsigned int timeo; 2131 2132 spin_lock(&xprt->transport_lock); 2133 keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); 2134 keepcnt = xprt->timeout->to_retries + 1; 2135 timeo = jiffies_to_msecs(xprt->timeout->to_initval) * 2136 (xprt->timeout->to_retries + 1); 2137 clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); 2138 spin_unlock(&xprt->transport_lock); 2139 2140 /* TCP Keepalive options */ 2141 sock_set_keepalive(sock->sk); 2142 tcp_sock_set_keepidle(sock->sk, keepidle); 2143 tcp_sock_set_keepintvl(sock->sk, keepidle); 2144 tcp_sock_set_keepcnt(sock->sk, keepcnt); 2145 2146 /* TCP user timeout (see RFC5482) */ 2147 tcp_sock_set_user_timeout(sock->sk, timeo); 2148 } 2149 2150 static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, 2151 unsigned long connect_timeout, 2152 unsigned long reconnect_timeout) 2153 { 2154 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2155 struct rpc_timeout to; 2156 unsigned long initval; 2157 2158 spin_lock(&xprt->transport_lock); 2159 if (reconnect_timeout < xprt->max_reconnect_timeout) 2160 xprt->max_reconnect_timeout = reconnect_timeout; 2161 if (connect_timeout < xprt->connect_timeout) { 2162 memcpy(&to, xprt->timeout, sizeof(to)); 2163 initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1); 2164 /* Arbitrary lower limit */ 2165 if (initval < XS_TCP_INIT_REEST_TO << 1) 2166 initval = XS_TCP_INIT_REEST_TO << 1; 2167 to.to_initval = initval; 2168 to.to_maxval = initval; 2169 memcpy(&transport->tcp_timeout, &to, 2170 sizeof(transport->tcp_timeout)); 2171 xprt->timeout = &transport->tcp_timeout; 2172 xprt->connect_timeout = connect_timeout; 2173 } 2174 set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); 2175 spin_unlock(&xprt->transport_lock); 2176 } 2177 2178 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 2179 { 2180 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2181 2182 if (!transport->inet) { 2183 struct sock *sk = sock->sk; 2184 2185 /* Avoid temporary address, they are bad for long-lived 2186 * connections such as NFS mounts. 2187 * RFC4941, section 3.6 suggests that: 2188 * Individual applications, which have specific 2189 * knowledge about the normal duration of connections, 2190 * MAY override this as appropriate. 2191 */ 2192 if (xs_addr(xprt)->sa_family == PF_INET6) { 2193 ip6_sock_set_addr_preferences(sk, 2194 IPV6_PREFER_SRC_PUBLIC); 2195 } 2196 2197 xs_tcp_set_socket_timeouts(xprt, sock); 2198 tcp_sock_set_nodelay(sk); 2199 2200 lock_sock(sk); 2201 2202 xs_save_old_callbacks(transport, sk); 2203 2204 sk->sk_user_data = xprt; 2205 sk->sk_data_ready = xs_data_ready; 2206 sk->sk_state_change = xs_tcp_state_change; 2207 sk->sk_write_space = xs_tcp_write_space; 2208 sk->sk_error_report = xs_error_report; 2209 2210 /* socket options */ 2211 sock_reset_flag(sk, SOCK_LINGER); 2212 2213 xprt_clear_connected(xprt); 2214 2215 /* Reset to new socket */ 2216 transport->sock = sock; 2217 transport->inet = sk; 2218 2219 release_sock(sk); 2220 } 2221 2222 if (!xprt_bound(xprt)) 2223 return -ENOTCONN; 2224 2225 xs_set_memalloc(xprt); 2226 2227 xs_stream_start_connect(transport); 2228 2229 /* Tell the socket layer to start connecting... */ 2230 set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); 2231 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 2232 } 2233 2234 /** 2235 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint 2236 * @work: queued work item 2237 * 2238 * Invoked by a work queue tasklet. 2239 */ 2240 static void xs_tcp_setup_socket(struct work_struct *work) 2241 { 2242 struct sock_xprt *transport = 2243 container_of(work, struct sock_xprt, connect_worker.work); 2244 struct socket *sock = transport->sock; 2245 struct rpc_xprt *xprt = &transport->xprt; 2246 int status; 2247 unsigned int pflags = current->flags; 2248 2249 if (atomic_read(&xprt->swapper)) 2250 current->flags |= PF_MEMALLOC; 2251 2252 if (xprt_connected(xprt)) 2253 goto out; 2254 if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, 2255 &transport->sock_state) || 2256 !sock) { 2257 xs_reset_transport(transport); 2258 sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, 2259 SOCK_STREAM, IPPROTO_TCP, true); 2260 if (IS_ERR(sock)) { 2261 xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); 2262 goto out; 2263 } 2264 } 2265 2266 dprintk("RPC: worker connecting xprt %p via %s to " 2267 "%s (port %s)\n", xprt, 2268 xprt->address_strings[RPC_DISPLAY_PROTO], 2269 xprt->address_strings[RPC_DISPLAY_ADDR], 2270 xprt->address_strings[RPC_DISPLAY_PORT]); 2271 2272 status = xs_tcp_finish_connecting(xprt, sock); 2273 trace_rpc_socket_connect(xprt, sock, status); 2274 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 2275 xprt, -status, xprt_connected(xprt), 2276 sock->sk->sk_state); 2277 switch (status) { 2278 case 0: 2279 case -EINPROGRESS: 2280 /* SYN_SENT! */ 2281 set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); 2282 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2283 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2284 fallthrough; 2285 case -EALREADY: 2286 goto out_unlock; 2287 case -EADDRNOTAVAIL: 2288 /* Source port number is unavailable. Try a new one! */ 2289 transport->srcport = 0; 2290 status = -EAGAIN; 2291 break; 2292 case -EINVAL: 2293 /* Happens, for instance, if the user specified a link 2294 * local IPv6 address without a scope-id. 2295 */ 2296 case -ECONNREFUSED: 2297 case -ECONNRESET: 2298 case -ENETDOWN: 2299 case -ENETUNREACH: 2300 case -EHOSTUNREACH: 2301 case -EADDRINUSE: 2302 case -ENOBUFS: 2303 break; 2304 default: 2305 printk("%s: connect returned unhandled error %d\n", 2306 __func__, status); 2307 status = -EAGAIN; 2308 } 2309 2310 /* xs_tcp_force_close() wakes tasks with a fixed error code. 2311 * We need to wake them first to ensure the correct error code. 2312 */ 2313 xprt_wake_pending_tasks(xprt, status); 2314 xs_tcp_force_close(xprt); 2315 out: 2316 xprt_clear_connecting(xprt); 2317 out_unlock: 2318 xprt_unlock_connect(xprt, transport); 2319 current_restore_flags(pflags, PF_MEMALLOC); 2320 } 2321 2322 /** 2323 * xs_connect - connect a socket to a remote endpoint 2324 * @xprt: pointer to transport structure 2325 * @task: address of RPC task that manages state of connect request 2326 * 2327 * TCP: If the remote end dropped the connection, delay reconnecting. 2328 * 2329 * UDP socket connects are synchronous, but we use a work queue anyway 2330 * to guarantee that even unprivileged user processes can set up a 2331 * socket on a privileged port. 2332 * 2333 * If a UDP socket connect fails, the delay behavior here prevents 2334 * retry floods (hard mounts). 2335 */ 2336 static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) 2337 { 2338 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2339 unsigned long delay = 0; 2340 2341 WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); 2342 2343 if (transport->sock != NULL) { 2344 dprintk("RPC: xs_connect delayed xprt %p for %lu " 2345 "seconds\n", xprt, xprt->reestablish_timeout / HZ); 2346 2347 delay = xprt_reconnect_delay(xprt); 2348 xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); 2349 2350 } else 2351 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 2352 2353 queue_delayed_work(xprtiod_workqueue, 2354 &transport->connect_worker, 2355 delay); 2356 } 2357 2358 static void xs_wake_disconnect(struct sock_xprt *transport) 2359 { 2360 if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state)) 2361 xs_tcp_force_close(&transport->xprt); 2362 } 2363 2364 static void xs_wake_write(struct sock_xprt *transport) 2365 { 2366 if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state)) 2367 xprt_write_space(&transport->xprt); 2368 } 2369 2370 static void xs_wake_error(struct sock_xprt *transport) 2371 { 2372 int sockerr; 2373 2374 if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) 2375 return; 2376 mutex_lock(&transport->recv_mutex); 2377 if (transport->sock == NULL) 2378 goto out; 2379 if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) 2380 goto out; 2381 sockerr = xchg(&transport->xprt_err, 0); 2382 if (sockerr < 0) 2383 xprt_wake_pending_tasks(&transport->xprt, sockerr); 2384 out: 2385 mutex_unlock(&transport->recv_mutex); 2386 } 2387 2388 static void xs_wake_pending(struct sock_xprt *transport) 2389 { 2390 if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state)) 2391 xprt_wake_pending_tasks(&transport->xprt, -EAGAIN); 2392 } 2393 2394 static void xs_error_handle(struct work_struct *work) 2395 { 2396 struct sock_xprt *transport = container_of(work, 2397 struct sock_xprt, error_worker); 2398 2399 xs_wake_disconnect(transport); 2400 xs_wake_write(transport); 2401 xs_wake_error(transport); 2402 xs_wake_pending(transport); 2403 } 2404 2405 /** 2406 * xs_local_print_stats - display AF_LOCAL socket-specific stats 2407 * @xprt: rpc_xprt struct containing statistics 2408 * @seq: output file 2409 * 2410 */ 2411 static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 2412 { 2413 long idle_time = 0; 2414 2415 if (xprt_connected(xprt)) 2416 idle_time = (long)(jiffies - xprt->last_used) / HZ; 2417 2418 seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " 2419 "%llu %llu %lu %llu %llu\n", 2420 xprt->stat.bind_count, 2421 xprt->stat.connect_count, 2422 xprt->stat.connect_time / HZ, 2423 idle_time, 2424 xprt->stat.sends, 2425 xprt->stat.recvs, 2426 xprt->stat.bad_xids, 2427 xprt->stat.req_u, 2428 xprt->stat.bklog_u, 2429 xprt->stat.max_slots, 2430 xprt->stat.sending_u, 2431 xprt->stat.pending_u); 2432 } 2433 2434 /** 2435 * xs_udp_print_stats - display UDP socket-specific stats 2436 * @xprt: rpc_xprt struct containing statistics 2437 * @seq: output file 2438 * 2439 */ 2440 static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 2441 { 2442 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2443 2444 seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " 2445 "%lu %llu %llu\n", 2446 transport->srcport, 2447 xprt->stat.bind_count, 2448 xprt->stat.sends, 2449 xprt->stat.recvs, 2450 xprt->stat.bad_xids, 2451 xprt->stat.req_u, 2452 xprt->stat.bklog_u, 2453 xprt->stat.max_slots, 2454 xprt->stat.sending_u, 2455 xprt->stat.pending_u); 2456 } 2457 2458 /** 2459 * xs_tcp_print_stats - display TCP socket-specific stats 2460 * @xprt: rpc_xprt struct containing statistics 2461 * @seq: output file 2462 * 2463 */ 2464 static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 2465 { 2466 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2467 long idle_time = 0; 2468 2469 if (xprt_connected(xprt)) 2470 idle_time = (long)(jiffies - xprt->last_used) / HZ; 2471 2472 seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " 2473 "%llu %llu %lu %llu %llu\n", 2474 transport->srcport, 2475 xprt->stat.bind_count, 2476 xprt->stat.connect_count, 2477 xprt->stat.connect_time / HZ, 2478 idle_time, 2479 xprt->stat.sends, 2480 xprt->stat.recvs, 2481 xprt->stat.bad_xids, 2482 xprt->stat.req_u, 2483 xprt->stat.bklog_u, 2484 xprt->stat.max_slots, 2485 xprt->stat.sending_u, 2486 xprt->stat.pending_u); 2487 } 2488 2489 /* 2490 * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason 2491 * we allocate pages instead doing a kmalloc like rpc_malloc is because we want 2492 * to use the server side send routines. 2493 */ 2494 static int bc_malloc(struct rpc_task *task) 2495 { 2496 struct rpc_rqst *rqst = task->tk_rqstp; 2497 size_t size = rqst->rq_callsize; 2498 struct page *page; 2499 struct rpc_buffer *buf; 2500 2501 if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) { 2502 WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n", 2503 size); 2504 return -EINVAL; 2505 } 2506 2507 page = alloc_page(GFP_KERNEL); 2508 if (!page) 2509 return -ENOMEM; 2510 2511 buf = page_address(page); 2512 buf->len = PAGE_SIZE; 2513 2514 rqst->rq_buffer = buf->data; 2515 rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; 2516 return 0; 2517 } 2518 2519 /* 2520 * Free the space allocated in the bc_alloc routine 2521 */ 2522 static void bc_free(struct rpc_task *task) 2523 { 2524 void *buffer = task->tk_rqstp->rq_buffer; 2525 struct rpc_buffer *buf; 2526 2527 buf = container_of(buffer, struct rpc_buffer, data); 2528 free_page((unsigned long)buf); 2529 } 2530 2531 static int bc_sendto(struct rpc_rqst *req) 2532 { 2533 struct xdr_buf *xdr = &req->rq_snd_buf; 2534 struct sock_xprt *transport = 2535 container_of(req->rq_xprt, struct sock_xprt, xprt); 2536 struct msghdr msg = { 2537 .msg_flags = 0, 2538 }; 2539 rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | 2540 (u32)xdr->len); 2541 unsigned int sent = 0; 2542 int err; 2543 2544 req->rq_xtime = ktime_get(); 2545 err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); 2546 xdr_free_bvec(xdr); 2547 if (err < 0 || sent != (xdr->len + sizeof(marker))) 2548 return -EAGAIN; 2549 return sent; 2550 } 2551 2552 /** 2553 * bc_send_request - Send a backchannel Call on a TCP socket 2554 * @req: rpc_rqst containing Call message to be sent 2555 * 2556 * xpt_mutex ensures @rqstp's whole message is written to the socket 2557 * without interruption. 2558 * 2559 * Return values: 2560 * %0 if the message was sent successfully 2561 * %ENOTCONN if the message was not sent 2562 */ 2563 static int bc_send_request(struct rpc_rqst *req) 2564 { 2565 struct svc_xprt *xprt; 2566 int len; 2567 2568 /* 2569 * Get the server socket associated with this callback xprt 2570 */ 2571 xprt = req->rq_xprt->bc_xprt; 2572 2573 /* 2574 * Grab the mutex to serialize data as the connection is shared 2575 * with the fore channel 2576 */ 2577 mutex_lock(&xprt->xpt_mutex); 2578 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) 2579 len = -ENOTCONN; 2580 else 2581 len = bc_sendto(req); 2582 mutex_unlock(&xprt->xpt_mutex); 2583 2584 if (len > 0) 2585 len = 0; 2586 2587 return len; 2588 } 2589 2590 /* 2591 * The close routine. Since this is client initiated, we do nothing 2592 */ 2593 2594 static void bc_close(struct rpc_xprt *xprt) 2595 { 2596 xprt_disconnect_done(xprt); 2597 } 2598 2599 /* 2600 * The xprt destroy routine. Again, because this connection is client 2601 * initiated, we do nothing 2602 */ 2603 2604 static void bc_destroy(struct rpc_xprt *xprt) 2605 { 2606 dprintk("RPC: bc_destroy xprt %p\n", xprt); 2607 2608 xs_xprt_free(xprt); 2609 module_put(THIS_MODULE); 2610 } 2611 2612 static const struct rpc_xprt_ops xs_local_ops = { 2613 .reserve_xprt = xprt_reserve_xprt, 2614 .release_xprt = xprt_release_xprt, 2615 .alloc_slot = xprt_alloc_slot, 2616 .free_slot = xprt_free_slot, 2617 .rpcbind = xs_local_rpcbind, 2618 .set_port = xs_local_set_port, 2619 .connect = xs_local_connect, 2620 .buf_alloc = rpc_malloc, 2621 .buf_free = rpc_free, 2622 .prepare_request = xs_stream_prepare_request, 2623 .send_request = xs_local_send_request, 2624 .wait_for_reply_request = xprt_wait_for_reply_request_def, 2625 .close = xs_close, 2626 .destroy = xs_destroy, 2627 .print_stats = xs_local_print_stats, 2628 .enable_swap = xs_enable_swap, 2629 .disable_swap = xs_disable_swap, 2630 }; 2631 2632 static const struct rpc_xprt_ops xs_udp_ops = { 2633 .set_buffer_size = xs_udp_set_buffer_size, 2634 .reserve_xprt = xprt_reserve_xprt_cong, 2635 .release_xprt = xprt_release_xprt_cong, 2636 .alloc_slot = xprt_alloc_slot, 2637 .free_slot = xprt_free_slot, 2638 .rpcbind = rpcb_getport_async, 2639 .set_port = xs_set_port, 2640 .connect = xs_connect, 2641 .buf_alloc = rpc_malloc, 2642 .buf_free = rpc_free, 2643 .send_request = xs_udp_send_request, 2644 .wait_for_reply_request = xprt_wait_for_reply_request_rtt, 2645 .timer = xs_udp_timer, 2646 .release_request = xprt_release_rqst_cong, 2647 .close = xs_close, 2648 .destroy = xs_destroy, 2649 .print_stats = xs_udp_print_stats, 2650 .enable_swap = xs_enable_swap, 2651 .disable_swap = xs_disable_swap, 2652 .inject_disconnect = xs_inject_disconnect, 2653 }; 2654 2655 static const struct rpc_xprt_ops xs_tcp_ops = { 2656 .reserve_xprt = xprt_reserve_xprt, 2657 .release_xprt = xprt_release_xprt, 2658 .alloc_slot = xprt_alloc_slot, 2659 .free_slot = xprt_free_slot, 2660 .rpcbind = rpcb_getport_async, 2661 .set_port = xs_set_port, 2662 .connect = xs_connect, 2663 .buf_alloc = rpc_malloc, 2664 .buf_free = rpc_free, 2665 .prepare_request = xs_stream_prepare_request, 2666 .send_request = xs_tcp_send_request, 2667 .wait_for_reply_request = xprt_wait_for_reply_request_def, 2668 .close = xs_tcp_shutdown, 2669 .destroy = xs_destroy, 2670 .set_connect_timeout = xs_tcp_set_connect_timeout, 2671 .print_stats = xs_tcp_print_stats, 2672 .enable_swap = xs_enable_swap, 2673 .disable_swap = xs_disable_swap, 2674 .inject_disconnect = xs_inject_disconnect, 2675 #ifdef CONFIG_SUNRPC_BACKCHANNEL 2676 .bc_setup = xprt_setup_bc, 2677 .bc_maxpayload = xs_tcp_bc_maxpayload, 2678 .bc_num_slots = xprt_bc_max_slots, 2679 .bc_free_rqst = xprt_free_bc_rqst, 2680 .bc_destroy = xprt_destroy_bc, 2681 #endif 2682 }; 2683 2684 /* 2685 * The rpc_xprt_ops for the server backchannel 2686 */ 2687 2688 static const struct rpc_xprt_ops bc_tcp_ops = { 2689 .reserve_xprt = xprt_reserve_xprt, 2690 .release_xprt = xprt_release_xprt, 2691 .alloc_slot = xprt_alloc_slot, 2692 .free_slot = xprt_free_slot, 2693 .buf_alloc = bc_malloc, 2694 .buf_free = bc_free, 2695 .send_request = bc_send_request, 2696 .wait_for_reply_request = xprt_wait_for_reply_request_def, 2697 .close = bc_close, 2698 .destroy = bc_destroy, 2699 .print_stats = xs_tcp_print_stats, 2700 .enable_swap = xs_enable_swap, 2701 .disable_swap = xs_disable_swap, 2702 .inject_disconnect = xs_inject_disconnect, 2703 }; 2704 2705 static int xs_init_anyaddr(const int family, struct sockaddr *sap) 2706 { 2707 static const struct sockaddr_in sin = { 2708 .sin_family = AF_INET, 2709 .sin_addr.s_addr = htonl(INADDR_ANY), 2710 }; 2711 static const struct sockaddr_in6 sin6 = { 2712 .sin6_family = AF_INET6, 2713 .sin6_addr = IN6ADDR_ANY_INIT, 2714 }; 2715 2716 switch (family) { 2717 case AF_LOCAL: 2718 break; 2719 case AF_INET: 2720 memcpy(sap, &sin, sizeof(sin)); 2721 break; 2722 case AF_INET6: 2723 memcpy(sap, &sin6, sizeof(sin6)); 2724 break; 2725 default: 2726 dprintk("RPC: %s: Bad address family\n", __func__); 2727 return -EAFNOSUPPORT; 2728 } 2729 return 0; 2730 } 2731 2732 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2733 unsigned int slot_table_size, 2734 unsigned int max_slot_table_size) 2735 { 2736 struct rpc_xprt *xprt; 2737 struct sock_xprt *new; 2738 2739 if (args->addrlen > sizeof(xprt->addr)) { 2740 dprintk("RPC: xs_setup_xprt: address too large\n"); 2741 return ERR_PTR(-EBADF); 2742 } 2743 2744 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size, 2745 max_slot_table_size); 2746 if (xprt == NULL) { 2747 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2748 "rpc_xprt\n"); 2749 return ERR_PTR(-ENOMEM); 2750 } 2751 2752 new = container_of(xprt, struct sock_xprt, xprt); 2753 mutex_init(&new->recv_mutex); 2754 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2755 xprt->addrlen = args->addrlen; 2756 if (args->srcaddr) 2757 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2758 else { 2759 int err; 2760 err = xs_init_anyaddr(args->dstaddr->sa_family, 2761 (struct sockaddr *)&new->srcaddr); 2762 if (err != 0) { 2763 xprt_free(xprt); 2764 return ERR_PTR(err); 2765 } 2766 } 2767 2768 return xprt; 2769 } 2770 2771 static const struct rpc_timeout xs_local_default_timeout = { 2772 .to_initval = 10 * HZ, 2773 .to_maxval = 10 * HZ, 2774 .to_retries = 2, 2775 }; 2776 2777 /** 2778 * xs_setup_local - Set up transport to use an AF_LOCAL socket 2779 * @args: rpc transport creation arguments 2780 * 2781 * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP 2782 */ 2783 static struct rpc_xprt *xs_setup_local(struct xprt_create *args) 2784 { 2785 struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; 2786 struct sock_xprt *transport; 2787 struct rpc_xprt *xprt; 2788 struct rpc_xprt *ret; 2789 2790 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2791 xprt_max_tcp_slot_table_entries); 2792 if (IS_ERR(xprt)) 2793 return xprt; 2794 transport = container_of(xprt, struct sock_xprt, xprt); 2795 2796 xprt->prot = 0; 2797 xprt->xprt_class = &xs_local_transport; 2798 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2799 2800 xprt->bind_timeout = XS_BIND_TO; 2801 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2802 xprt->idle_timeout = XS_IDLE_DISC_TO; 2803 2804 xprt->ops = &xs_local_ops; 2805 xprt->timeout = &xs_local_default_timeout; 2806 2807 INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); 2808 INIT_WORK(&transport->error_worker, xs_error_handle); 2809 INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); 2810 2811 switch (sun->sun_family) { 2812 case AF_LOCAL: 2813 if (sun->sun_path[0] != '/') { 2814 dprintk("RPC: bad AF_LOCAL address: %s\n", 2815 sun->sun_path); 2816 ret = ERR_PTR(-EINVAL); 2817 goto out_err; 2818 } 2819 xprt_set_bound(xprt); 2820 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); 2821 ret = ERR_PTR(xs_local_setup_socket(transport)); 2822 if (ret) 2823 goto out_err; 2824 break; 2825 default: 2826 ret = ERR_PTR(-EAFNOSUPPORT); 2827 goto out_err; 2828 } 2829 2830 dprintk("RPC: set up xprt to %s via AF_LOCAL\n", 2831 xprt->address_strings[RPC_DISPLAY_ADDR]); 2832 2833 if (try_module_get(THIS_MODULE)) 2834 return xprt; 2835 ret = ERR_PTR(-EINVAL); 2836 out_err: 2837 xs_xprt_free(xprt); 2838 return ret; 2839 } 2840 2841 static const struct rpc_timeout xs_udp_default_timeout = { 2842 .to_initval = 5 * HZ, 2843 .to_maxval = 30 * HZ, 2844 .to_increment = 5 * HZ, 2845 .to_retries = 5, 2846 }; 2847 2848 /** 2849 * xs_setup_udp - Set up transport to use a UDP socket 2850 * @args: rpc transport creation arguments 2851 * 2852 */ 2853 static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) 2854 { 2855 struct sockaddr *addr = args->dstaddr; 2856 struct rpc_xprt *xprt; 2857 struct sock_xprt *transport; 2858 struct rpc_xprt *ret; 2859 2860 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries, 2861 xprt_udp_slot_table_entries); 2862 if (IS_ERR(xprt)) 2863 return xprt; 2864 transport = container_of(xprt, struct sock_xprt, xprt); 2865 2866 xprt->prot = IPPROTO_UDP; 2867 xprt->xprt_class = &xs_udp_transport; 2868 /* XXX: header size can vary due to auth type, IPv6, etc. */ 2869 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 2870 2871 xprt->bind_timeout = XS_BIND_TO; 2872 xprt->reestablish_timeout = XS_UDP_REEST_TO; 2873 xprt->idle_timeout = XS_IDLE_DISC_TO; 2874 2875 xprt->ops = &xs_udp_ops; 2876 2877 xprt->timeout = &xs_udp_default_timeout; 2878 2879 INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); 2880 INIT_WORK(&transport->error_worker, xs_error_handle); 2881 INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); 2882 2883 switch (addr->sa_family) { 2884 case AF_INET: 2885 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2886 xprt_set_bound(xprt); 2887 2888 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2889 break; 2890 case AF_INET6: 2891 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2892 xprt_set_bound(xprt); 2893 2894 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2895 break; 2896 default: 2897 ret = ERR_PTR(-EAFNOSUPPORT); 2898 goto out_err; 2899 } 2900 2901 if (xprt_bound(xprt)) 2902 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2903 xprt->address_strings[RPC_DISPLAY_ADDR], 2904 xprt->address_strings[RPC_DISPLAY_PORT], 2905 xprt->address_strings[RPC_DISPLAY_PROTO]); 2906 else 2907 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2908 xprt->address_strings[RPC_DISPLAY_ADDR], 2909 xprt->address_strings[RPC_DISPLAY_PROTO]); 2910 2911 if (try_module_get(THIS_MODULE)) 2912 return xprt; 2913 ret = ERR_PTR(-EINVAL); 2914 out_err: 2915 xs_xprt_free(xprt); 2916 return ret; 2917 } 2918 2919 static const struct rpc_timeout xs_tcp_default_timeout = { 2920 .to_initval = 60 * HZ, 2921 .to_maxval = 60 * HZ, 2922 .to_retries = 2, 2923 }; 2924 2925 /** 2926 * xs_setup_tcp - Set up transport to use a TCP socket 2927 * @args: rpc transport creation arguments 2928 * 2929 */ 2930 static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) 2931 { 2932 struct sockaddr *addr = args->dstaddr; 2933 struct rpc_xprt *xprt; 2934 struct sock_xprt *transport; 2935 struct rpc_xprt *ret; 2936 unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; 2937 2938 if (args->flags & XPRT_CREATE_INFINITE_SLOTS) 2939 max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; 2940 2941 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2942 max_slot_table_size); 2943 if (IS_ERR(xprt)) 2944 return xprt; 2945 transport = container_of(xprt, struct sock_xprt, xprt); 2946 2947 xprt->prot = IPPROTO_TCP; 2948 xprt->xprt_class = &xs_tcp_transport; 2949 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2950 2951 xprt->bind_timeout = XS_BIND_TO; 2952 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2953 xprt->idle_timeout = XS_IDLE_DISC_TO; 2954 2955 xprt->ops = &xs_tcp_ops; 2956 xprt->timeout = &xs_tcp_default_timeout; 2957 2958 xprt->max_reconnect_timeout = xprt->timeout->to_maxval; 2959 xprt->connect_timeout = xprt->timeout->to_initval * 2960 (xprt->timeout->to_retries + 1); 2961 2962 INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); 2963 INIT_WORK(&transport->error_worker, xs_error_handle); 2964 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); 2965 2966 switch (addr->sa_family) { 2967 case AF_INET: 2968 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2969 xprt_set_bound(xprt); 2970 2971 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2972 break; 2973 case AF_INET6: 2974 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2975 xprt_set_bound(xprt); 2976 2977 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2978 break; 2979 default: 2980 ret = ERR_PTR(-EAFNOSUPPORT); 2981 goto out_err; 2982 } 2983 2984 if (xprt_bound(xprt)) 2985 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2986 xprt->address_strings[RPC_DISPLAY_ADDR], 2987 xprt->address_strings[RPC_DISPLAY_PORT], 2988 xprt->address_strings[RPC_DISPLAY_PROTO]); 2989 else 2990 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2991 xprt->address_strings[RPC_DISPLAY_ADDR], 2992 xprt->address_strings[RPC_DISPLAY_PROTO]); 2993 2994 if (try_module_get(THIS_MODULE)) 2995 return xprt; 2996 ret = ERR_PTR(-EINVAL); 2997 out_err: 2998 xs_xprt_free(xprt); 2999 return ret; 3000 } 3001 3002 /** 3003 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket 3004 * @args: rpc transport creation arguments 3005 * 3006 */ 3007 static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) 3008 { 3009 struct sockaddr *addr = args->dstaddr; 3010 struct rpc_xprt *xprt; 3011 struct sock_xprt *transport; 3012 struct svc_sock *bc_sock; 3013 struct rpc_xprt *ret; 3014 3015 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 3016 xprt_tcp_slot_table_entries); 3017 if (IS_ERR(xprt)) 3018 return xprt; 3019 transport = container_of(xprt, struct sock_xprt, xprt); 3020 3021 xprt->prot = IPPROTO_TCP; 3022 xprt->xprt_class = &xs_bc_tcp_transport; 3023 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 3024 xprt->timeout = &xs_tcp_default_timeout; 3025 3026 /* backchannel */ 3027 xprt_set_bound(xprt); 3028 xprt->bind_timeout = 0; 3029 xprt->reestablish_timeout = 0; 3030 xprt->idle_timeout = 0; 3031 3032 xprt->ops = &bc_tcp_ops; 3033 3034 switch (addr->sa_family) { 3035 case AF_INET: 3036 xs_format_peer_addresses(xprt, "tcp", 3037 RPCBIND_NETID_TCP); 3038 break; 3039 case AF_INET6: 3040 xs_format_peer_addresses(xprt, "tcp", 3041 RPCBIND_NETID_TCP6); 3042 break; 3043 default: 3044 ret = ERR_PTR(-EAFNOSUPPORT); 3045 goto out_err; 3046 } 3047 3048 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 3049 xprt->address_strings[RPC_DISPLAY_ADDR], 3050 xprt->address_strings[RPC_DISPLAY_PORT], 3051 xprt->address_strings[RPC_DISPLAY_PROTO]); 3052 3053 /* 3054 * Once we've associated a backchannel xprt with a connection, 3055 * we want to keep it around as long as the connection lasts, 3056 * in case we need to start using it for a backchannel again; 3057 * this reference won't be dropped until bc_xprt is destroyed. 3058 */ 3059 xprt_get(xprt); 3060 args->bc_xprt->xpt_bc_xprt = xprt; 3061 xprt->bc_xprt = args->bc_xprt; 3062 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); 3063 transport->sock = bc_sock->sk_sock; 3064 transport->inet = bc_sock->sk_sk; 3065 3066 /* 3067 * Since we don't want connections for the backchannel, we set 3068 * the xprt status to connected 3069 */ 3070 xprt_set_connected(xprt); 3071 3072 if (try_module_get(THIS_MODULE)) 3073 return xprt; 3074 3075 args->bc_xprt->xpt_bc_xprt = NULL; 3076 args->bc_xprt->xpt_bc_xps = NULL; 3077 xprt_put(xprt); 3078 ret = ERR_PTR(-EINVAL); 3079 out_err: 3080 xs_xprt_free(xprt); 3081 return ret; 3082 } 3083 3084 static struct xprt_class xs_local_transport = { 3085 .list = LIST_HEAD_INIT(xs_local_transport.list), 3086 .name = "named UNIX socket", 3087 .owner = THIS_MODULE, 3088 .ident = XPRT_TRANSPORT_LOCAL, 3089 .setup = xs_setup_local, 3090 .netid = { "" }, 3091 }; 3092 3093 static struct xprt_class xs_udp_transport = { 3094 .list = LIST_HEAD_INIT(xs_udp_transport.list), 3095 .name = "udp", 3096 .owner = THIS_MODULE, 3097 .ident = XPRT_TRANSPORT_UDP, 3098 .setup = xs_setup_udp, 3099 .netid = { "udp", "udp6", "" }, 3100 }; 3101 3102 static struct xprt_class xs_tcp_transport = { 3103 .list = LIST_HEAD_INIT(xs_tcp_transport.list), 3104 .name = "tcp", 3105 .owner = THIS_MODULE, 3106 .ident = XPRT_TRANSPORT_TCP, 3107 .setup = xs_setup_tcp, 3108 .netid = { "tcp", "tcp6", "" }, 3109 }; 3110 3111 static struct xprt_class xs_bc_tcp_transport = { 3112 .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), 3113 .name = "tcp NFSv4.1 backchannel", 3114 .owner = THIS_MODULE, 3115 .ident = XPRT_TRANSPORT_BC_TCP, 3116 .setup = xs_setup_bc_tcp, 3117 .netid = { "" }, 3118 }; 3119 3120 /** 3121 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client 3122 * 3123 */ 3124 int init_socket_xprt(void) 3125 { 3126 if (!sunrpc_table_header) 3127 sunrpc_table_header = register_sysctl_table(sunrpc_table); 3128 3129 xprt_register_transport(&xs_local_transport); 3130 xprt_register_transport(&xs_udp_transport); 3131 xprt_register_transport(&xs_tcp_transport); 3132 xprt_register_transport(&xs_bc_tcp_transport); 3133 3134 return 0; 3135 } 3136 3137 /** 3138 * cleanup_socket_xprt - remove xprtsock's sysctls, unregister 3139 * 3140 */ 3141 void cleanup_socket_xprt(void) 3142 { 3143 if (sunrpc_table_header) { 3144 unregister_sysctl_table(sunrpc_table_header); 3145 sunrpc_table_header = NULL; 3146 } 3147 3148 xprt_unregister_transport(&xs_local_transport); 3149 xprt_unregister_transport(&xs_udp_transport); 3150 xprt_unregister_transport(&xs_tcp_transport); 3151 xprt_unregister_transport(&xs_bc_tcp_transport); 3152 } 3153 3154 static int param_set_portnr(const char *val, const struct kernel_param *kp) 3155 { 3156 return param_set_uint_minmax(val, kp, 3157 RPC_MIN_RESVPORT, 3158 RPC_MAX_RESVPORT); 3159 } 3160 3161 static const struct kernel_param_ops param_ops_portnr = { 3162 .set = param_set_portnr, 3163 .get = param_get_uint, 3164 }; 3165 3166 #define param_check_portnr(name, p) \ 3167 __param_check(name, p, unsigned int); 3168 3169 module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); 3170 module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); 3171 3172 static int param_set_slot_table_size(const char *val, 3173 const struct kernel_param *kp) 3174 { 3175 return param_set_uint_minmax(val, kp, 3176 RPC_MIN_SLOT_TABLE, 3177 RPC_MAX_SLOT_TABLE); 3178 } 3179 3180 static const struct kernel_param_ops param_ops_slot_table_size = { 3181 .set = param_set_slot_table_size, 3182 .get = param_get_uint, 3183 }; 3184 3185 #define param_check_slot_table_size(name, p) \ 3186 __param_check(name, p, unsigned int); 3187 3188 static int param_set_max_slot_table_size(const char *val, 3189 const struct kernel_param *kp) 3190 { 3191 return param_set_uint_minmax(val, kp, 3192 RPC_MIN_SLOT_TABLE, 3193 RPC_MAX_SLOT_TABLE_LIMIT); 3194 } 3195 3196 static const struct kernel_param_ops param_ops_max_slot_table_size = { 3197 .set = param_set_max_slot_table_size, 3198 .get = param_get_uint, 3199 }; 3200 3201 #define param_check_max_slot_table_size(name, p) \ 3202 __param_check(name, p, unsigned int); 3203 3204 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, 3205 slot_table_size, 0644); 3206 module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, 3207 max_slot_table_size, 0644); 3208 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, 3209 slot_table_size, 0644); 3210