11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/net/sunrpc/xprt.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This is a generic RPC call interface supporting congestion avoidance, 51da177e4SLinus Torvalds * and asynchronous calls. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * The interface works like this: 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * - When a process places a call, it allocates a request slot if 101da177e4SLinus Torvalds * one is available. Otherwise, it sleeps on the backlog queue 111da177e4SLinus Torvalds * (xprt_reserve). 121da177e4SLinus Torvalds * - Next, the caller puts together the RPC message, stuffs it into 131da177e4SLinus Torvalds * the request struct, and calls xprt_call(). 141da177e4SLinus Torvalds * - xprt_call transmits the message and installs the caller on the 151da177e4SLinus Torvalds * socket's wait list. At the same time, it installs a timer that 161da177e4SLinus Torvalds * is run after the packet's timeout has expired. 171da177e4SLinus Torvalds * - When a packet arrives, the data_ready handler walks the list of 181da177e4SLinus Torvalds * pending requests for that socket. If a matching XID is found, the 191da177e4SLinus Torvalds * caller is woken up, and the timer removed. 201da177e4SLinus Torvalds * - When no reply arrives within the timeout interval, the timer is 211da177e4SLinus Torvalds * fired by the kernel and runs xprt_timer(). It either adjusts the 221da177e4SLinus Torvalds * timeout values (minor timeout) or wakes up the caller with a status 231da177e4SLinus Torvalds * of -ETIMEDOUT. 241da177e4SLinus Torvalds * - When the caller receives a notification from RPC that a reply arrived, 251da177e4SLinus Torvalds * it should release the RPC slot, and process the reply. 261da177e4SLinus Torvalds * If the call timed out, it may choose to retry the operation by 271da177e4SLinus Torvalds * adjusting the initial timeout value, and simply calling rpc_call 281da177e4SLinus Torvalds * again. 291da177e4SLinus Torvalds * 301da177e4SLinus Torvalds * Support for async RPC is done through a set of RPC-specific scheduling 311da177e4SLinus Torvalds * primitives that `transparently' work for processes as well as async 321da177e4SLinus Torvalds * tasks that rely on callbacks. 331da177e4SLinus Torvalds * 341da177e4SLinus Torvalds * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> 351da177e4SLinus Torvalds * 361da177e4SLinus Torvalds * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> 371da177e4SLinus Torvalds * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> 381da177e4SLinus Torvalds * TCP NFS related read + write fixes 391da177e4SLinus Torvalds * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> 401da177e4SLinus Torvalds * 411da177e4SLinus Torvalds * Rewrite of larges part of the code in order to stabilize TCP stuff. 421da177e4SLinus Torvalds * Fix behaviour when socket buffer is full. 431da177e4SLinus Torvalds * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> 441da177e4SLinus Torvalds */ 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds #include <linux/types.h> 471da177e4SLinus Torvalds #include <linux/slab.h> 481da177e4SLinus Torvalds #include <linux/capability.h> 491da177e4SLinus Torvalds #include <linux/sched.h> 501da177e4SLinus Torvalds #include <linux/errno.h> 511da177e4SLinus Torvalds #include <linux/socket.h> 521da177e4SLinus Torvalds #include <linux/in.h> 531da177e4SLinus Torvalds #include <linux/net.h> 541da177e4SLinus Torvalds #include <linux/mm.h> 551da177e4SLinus Torvalds #include <linux/udp.h> 561da177e4SLinus Torvalds #include <linux/tcp.h> 571da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h> 581da177e4SLinus Torvalds #include <linux/file.h> 591da177e4SLinus Torvalds #include <linux/workqueue.h> 601da177e4SLinus Torvalds #include <linux/random.h> 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds #include <net/sock.h> 631da177e4SLinus Torvalds #include <net/checksum.h> 641da177e4SLinus Torvalds #include <net/udp.h> 651da177e4SLinus Torvalds #include <net/tcp.h> 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds /* 681da177e4SLinus Torvalds * Local variables 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds #ifdef RPC_DEBUG 721da177e4SLinus Torvalds # undef RPC_DEBUG_DATA 731da177e4SLinus Torvalds # define RPCDBG_FACILITY RPCDBG_XPRT 741da177e4SLinus Torvalds #endif 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds #define XPRT_MAX_BACKOFF (8) 771da177e4SLinus Torvalds #define XPRT_IDLE_TIMEOUT (5*60*HZ) 781da177e4SLinus Torvalds #define XPRT_MAX_RESVPORT (800) 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds /* 811da177e4SLinus Torvalds * Local functions 821da177e4SLinus Torvalds */ 831da177e4SLinus Torvalds static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 841da177e4SLinus Torvalds static inline void do_xprt_reserve(struct rpc_task *); 851da177e4SLinus Torvalds static void xprt_disconnect(struct rpc_xprt *); 861da177e4SLinus Torvalds static void xprt_connect_status(struct rpc_task *task); 871da177e4SLinus Torvalds static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, 881da177e4SLinus Torvalds struct rpc_timeout *to); 891da177e4SLinus Torvalds static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); 901da177e4SLinus Torvalds static void xprt_bind_socket(struct rpc_xprt *, struct socket *); 911da177e4SLinus Torvalds static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds static int xprt_clear_backlog(struct rpc_xprt *xprt); 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds #ifdef RPC_DEBUG_DATA 961da177e4SLinus Torvalds /* 971da177e4SLinus Torvalds * Print the buffer contents (first 128 bytes only--just enough for 981da177e4SLinus Torvalds * diropres return). 991da177e4SLinus Torvalds */ 1001da177e4SLinus Torvalds static void 1011da177e4SLinus Torvalds xprt_pktdump(char *msg, u32 *packet, unsigned int count) 1021da177e4SLinus Torvalds { 1031da177e4SLinus Torvalds u8 *buf = (u8 *) packet; 1041da177e4SLinus Torvalds int j; 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds dprintk("RPC: %s\n", msg); 1071da177e4SLinus Torvalds for (j = 0; j < count && j < 128; j += 4) { 1081da177e4SLinus Torvalds if (!(j & 31)) { 1091da177e4SLinus Torvalds if (j) 1101da177e4SLinus Torvalds dprintk("\n"); 1111da177e4SLinus Torvalds dprintk("0x%04x ", j); 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds dprintk("%02x%02x%02x%02x ", 1141da177e4SLinus Torvalds buf[j], buf[j+1], buf[j+2], buf[j+3]); 1151da177e4SLinus Torvalds } 1161da177e4SLinus Torvalds dprintk("\n"); 1171da177e4SLinus Torvalds } 1181da177e4SLinus Torvalds #else 1191da177e4SLinus Torvalds static inline void 1201da177e4SLinus Torvalds xprt_pktdump(char *msg, u32 *packet, unsigned int count) 1211da177e4SLinus Torvalds { 1221da177e4SLinus Torvalds /* NOP */ 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds #endif 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* 1271da177e4SLinus Torvalds * Look up RPC transport given an INET socket 1281da177e4SLinus Torvalds */ 1291da177e4SLinus Torvalds static inline struct rpc_xprt * 1301da177e4SLinus Torvalds xprt_from_sock(struct sock *sk) 1311da177e4SLinus Torvalds { 1321da177e4SLinus Torvalds return (struct rpc_xprt *) sk->sk_user_data; 1331da177e4SLinus Torvalds } 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds /* 1361da177e4SLinus Torvalds * Serialize write access to sockets, in order to prevent different 1371da177e4SLinus Torvalds * requests from interfering with each other. 1381da177e4SLinus Torvalds * Also prevents TCP socket connects from colliding with writes. 1391da177e4SLinus Torvalds */ 1401da177e4SLinus Torvalds static int 1411da177e4SLinus Torvalds __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 1421da177e4SLinus Torvalds { 1431da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { 1461da177e4SLinus Torvalds if (task == xprt->snd_task) 1471da177e4SLinus Torvalds return 1; 1481da177e4SLinus Torvalds goto out_sleep; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds if (xprt->nocong || __xprt_get_cong(xprt, task)) { 1511da177e4SLinus Torvalds xprt->snd_task = task; 1521da177e4SLinus Torvalds if (req) { 1531da177e4SLinus Torvalds req->rq_bytes_sent = 0; 1541da177e4SLinus Torvalds req->rq_ntrans++; 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds return 1; 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds smp_mb__before_clear_bit(); 1591da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 1601da177e4SLinus Torvalds smp_mb__after_clear_bit(); 1611da177e4SLinus Torvalds out_sleep: 1621da177e4SLinus Torvalds dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); 1631da177e4SLinus Torvalds task->tk_timeout = 0; 1641da177e4SLinus Torvalds task->tk_status = -EAGAIN; 1651da177e4SLinus Torvalds if (req && req->rq_ntrans) 1661da177e4SLinus Torvalds rpc_sleep_on(&xprt->resend, task, NULL, NULL); 1671da177e4SLinus Torvalds else 1681da177e4SLinus Torvalds rpc_sleep_on(&xprt->sending, task, NULL, NULL); 1691da177e4SLinus Torvalds return 0; 1701da177e4SLinus Torvalds } 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds static inline int 1731da177e4SLinus Torvalds xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 1741da177e4SLinus Torvalds { 1751da177e4SLinus Torvalds int retval; 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 1781da177e4SLinus Torvalds retval = __xprt_lock_write(xprt, task); 1791da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 1801da177e4SLinus Torvalds return retval; 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds static void 1851da177e4SLinus Torvalds __xprt_lock_write_next(struct rpc_xprt *xprt) 1861da177e4SLinus Torvalds { 1871da177e4SLinus Torvalds struct rpc_task *task; 1881da177e4SLinus Torvalds 1891da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 1901da177e4SLinus Torvalds return; 1911da177e4SLinus Torvalds if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) 1921da177e4SLinus Torvalds goto out_unlock; 1931da177e4SLinus Torvalds task = rpc_wake_up_next(&xprt->resend); 1941da177e4SLinus Torvalds if (!task) { 1951da177e4SLinus Torvalds task = rpc_wake_up_next(&xprt->sending); 1961da177e4SLinus Torvalds if (!task) 1971da177e4SLinus Torvalds goto out_unlock; 1981da177e4SLinus Torvalds } 1991da177e4SLinus Torvalds if (xprt->nocong || __xprt_get_cong(xprt, task)) { 2001da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 2011da177e4SLinus Torvalds xprt->snd_task = task; 2021da177e4SLinus Torvalds if (req) { 2031da177e4SLinus Torvalds req->rq_bytes_sent = 0; 2041da177e4SLinus Torvalds req->rq_ntrans++; 2051da177e4SLinus Torvalds } 2061da177e4SLinus Torvalds return; 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds out_unlock: 2091da177e4SLinus Torvalds smp_mb__before_clear_bit(); 2101da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 2111da177e4SLinus Torvalds smp_mb__after_clear_bit(); 2121da177e4SLinus Torvalds } 2131da177e4SLinus Torvalds 2141da177e4SLinus Torvalds /* 2151da177e4SLinus Torvalds * Releases the socket for use by other requests. 2161da177e4SLinus Torvalds */ 2171da177e4SLinus Torvalds static void 2181da177e4SLinus Torvalds __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 2191da177e4SLinus Torvalds { 2201da177e4SLinus Torvalds if (xprt->snd_task == task) { 2211da177e4SLinus Torvalds xprt->snd_task = NULL; 2221da177e4SLinus Torvalds smp_mb__before_clear_bit(); 2231da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 2241da177e4SLinus Torvalds smp_mb__after_clear_bit(); 2251da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds } 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds static inline void 2301da177e4SLinus Torvalds xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 2311da177e4SLinus Torvalds { 2321da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 2331da177e4SLinus Torvalds __xprt_release_write(xprt, task); 2341da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 2351da177e4SLinus Torvalds } 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds /* 2381da177e4SLinus Torvalds * Write data to socket. 2391da177e4SLinus Torvalds */ 2401da177e4SLinus Torvalds static inline int 2411da177e4SLinus Torvalds xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) 2421da177e4SLinus Torvalds { 2431da177e4SLinus Torvalds struct socket *sock = xprt->sock; 2441da177e4SLinus Torvalds struct xdr_buf *xdr = &req->rq_snd_buf; 2451da177e4SLinus Torvalds struct sockaddr *addr = NULL; 2461da177e4SLinus Torvalds int addrlen = 0; 2471da177e4SLinus Torvalds unsigned int skip; 2481da177e4SLinus Torvalds int result; 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds if (!sock) 2511da177e4SLinus Torvalds return -ENOTCONN; 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds xprt_pktdump("packet data:", 2541da177e4SLinus Torvalds req->rq_svec->iov_base, 2551da177e4SLinus Torvalds req->rq_svec->iov_len); 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds /* For UDP, we need to provide an address */ 2581da177e4SLinus Torvalds if (!xprt->stream) { 2591da177e4SLinus Torvalds addr = (struct sockaddr *) &xprt->addr; 2601da177e4SLinus Torvalds addrlen = sizeof(xprt->addr); 2611da177e4SLinus Torvalds } 2621da177e4SLinus Torvalds /* Dont repeat bytes */ 2631da177e4SLinus Torvalds skip = req->rq_bytes_sent; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 2661da177e4SLinus Torvalds result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); 2671da177e4SLinus Torvalds 2681da177e4SLinus Torvalds dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds if (result >= 0) 2711da177e4SLinus Torvalds return result; 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds switch (result) { 2741da177e4SLinus Torvalds case -ECONNREFUSED: 2751da177e4SLinus Torvalds /* When the server has died, an ICMP port unreachable message 2761da177e4SLinus Torvalds * prompts ECONNREFUSED. 2771da177e4SLinus Torvalds */ 2781da177e4SLinus Torvalds case -EAGAIN: 2791da177e4SLinus Torvalds break; 2801da177e4SLinus Torvalds case -ECONNRESET: 2811da177e4SLinus Torvalds case -ENOTCONN: 2821da177e4SLinus Torvalds case -EPIPE: 2831da177e4SLinus Torvalds /* connection broken */ 2841da177e4SLinus Torvalds if (xprt->stream) 2851da177e4SLinus Torvalds result = -ENOTCONN; 2861da177e4SLinus Torvalds break; 2871da177e4SLinus Torvalds default: 2881da177e4SLinus Torvalds printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds return result; 2911da177e4SLinus Torvalds } 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds /* 2941da177e4SLinus Torvalds * Van Jacobson congestion avoidance. Check if the congestion window 2951da177e4SLinus Torvalds * overflowed. Put the task to sleep if this is the case. 2961da177e4SLinus Torvalds */ 2971da177e4SLinus Torvalds static int 2981da177e4SLinus Torvalds __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds if (req->rq_cong) 3031da177e4SLinus Torvalds return 1; 3041da177e4SLinus Torvalds dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", 3051da177e4SLinus Torvalds task->tk_pid, xprt->cong, xprt->cwnd); 3061da177e4SLinus Torvalds if (RPCXPRT_CONGESTED(xprt)) 3071da177e4SLinus Torvalds return 0; 3081da177e4SLinus Torvalds req->rq_cong = 1; 3091da177e4SLinus Torvalds xprt->cong += RPC_CWNDSCALE; 3101da177e4SLinus Torvalds return 1; 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds 3131da177e4SLinus Torvalds /* 3141da177e4SLinus Torvalds * Adjust the congestion window, and wake up the next task 3151da177e4SLinus Torvalds * that has been sleeping due to congestion 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds static void 3181da177e4SLinus Torvalds __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 3191da177e4SLinus Torvalds { 3201da177e4SLinus Torvalds if (!req->rq_cong) 3211da177e4SLinus Torvalds return; 3221da177e4SLinus Torvalds req->rq_cong = 0; 3231da177e4SLinus Torvalds xprt->cong -= RPC_CWNDSCALE; 3241da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 3251da177e4SLinus Torvalds } 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds /* 3281da177e4SLinus Torvalds * Adjust RPC congestion window 3291da177e4SLinus Torvalds * We use a time-smoothed congestion estimator to avoid heavy oscillation. 3301da177e4SLinus Torvalds */ 3311da177e4SLinus Torvalds static void 3321da177e4SLinus Torvalds xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) 3331da177e4SLinus Torvalds { 3341da177e4SLinus Torvalds unsigned long cwnd; 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds cwnd = xprt->cwnd; 3371da177e4SLinus Torvalds if (result >= 0 && cwnd <= xprt->cong) { 3381da177e4SLinus Torvalds /* The (cwnd >> 1) term makes sure 3391da177e4SLinus Torvalds * the result gets rounded properly. */ 3401da177e4SLinus Torvalds cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; 3411da177e4SLinus Torvalds if (cwnd > RPC_MAXCWND(xprt)) 3421da177e4SLinus Torvalds cwnd = RPC_MAXCWND(xprt); 3431da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 3441da177e4SLinus Torvalds } else if (result == -ETIMEDOUT) { 3451da177e4SLinus Torvalds cwnd >>= 1; 3461da177e4SLinus Torvalds if (cwnd < RPC_CWNDSCALE) 3471da177e4SLinus Torvalds cwnd = RPC_CWNDSCALE; 3481da177e4SLinus Torvalds } 3491da177e4SLinus Torvalds dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", 3501da177e4SLinus Torvalds xprt->cong, xprt->cwnd, cwnd); 3511da177e4SLinus Torvalds xprt->cwnd = cwnd; 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 3541da177e4SLinus Torvalds /* 3551da177e4SLinus Torvalds * Reset the major timeout value 3561da177e4SLinus Torvalds */ 3571da177e4SLinus Torvalds static void xprt_reset_majortimeo(struct rpc_rqst *req) 3581da177e4SLinus Torvalds { 3591da177e4SLinus Torvalds struct rpc_timeout *to = &req->rq_xprt->timeout; 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds req->rq_majortimeo = req->rq_timeout; 3621da177e4SLinus Torvalds if (to->to_exponential) 3631da177e4SLinus Torvalds req->rq_majortimeo <<= to->to_retries; 3641da177e4SLinus Torvalds else 3651da177e4SLinus Torvalds req->rq_majortimeo += to->to_increment * to->to_retries; 3661da177e4SLinus Torvalds if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) 3671da177e4SLinus Torvalds req->rq_majortimeo = to->to_maxval; 3681da177e4SLinus Torvalds req->rq_majortimeo += jiffies; 3691da177e4SLinus Torvalds } 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds /* 3721da177e4SLinus Torvalds * Adjust timeout values etc for next retransmit 3731da177e4SLinus Torvalds */ 3741da177e4SLinus Torvalds int xprt_adjust_timeout(struct rpc_rqst *req) 3751da177e4SLinus Torvalds { 3761da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 3771da177e4SLinus Torvalds struct rpc_timeout *to = &xprt->timeout; 3781da177e4SLinus Torvalds int status = 0; 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds if (time_before(jiffies, req->rq_majortimeo)) { 3811da177e4SLinus Torvalds if (to->to_exponential) 3821da177e4SLinus Torvalds req->rq_timeout <<= 1; 3831da177e4SLinus Torvalds else 3841da177e4SLinus Torvalds req->rq_timeout += to->to_increment; 3851da177e4SLinus Torvalds if (to->to_maxval && req->rq_timeout >= to->to_maxval) 3861da177e4SLinus Torvalds req->rq_timeout = to->to_maxval; 3871da177e4SLinus Torvalds req->rq_retries++; 3881da177e4SLinus Torvalds pprintk("RPC: %lu retrans\n", jiffies); 3891da177e4SLinus Torvalds } else { 3901da177e4SLinus Torvalds req->rq_timeout = to->to_initval; 3911da177e4SLinus Torvalds req->rq_retries = 0; 3921da177e4SLinus Torvalds xprt_reset_majortimeo(req); 3931da177e4SLinus Torvalds /* Reset the RTT counters == "slow start" */ 3941da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 3951da177e4SLinus Torvalds rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 3961da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 3971da177e4SLinus Torvalds pprintk("RPC: %lu timeout\n", jiffies); 3981da177e4SLinus Torvalds status = -ETIMEDOUT; 3991da177e4SLinus Torvalds } 4001da177e4SLinus Torvalds 4011da177e4SLinus Torvalds if (req->rq_timeout == 0) { 4021da177e4SLinus Torvalds printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); 4031da177e4SLinus Torvalds req->rq_timeout = 5 * HZ; 4041da177e4SLinus Torvalds } 4051da177e4SLinus Torvalds return status; 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds /* 4091da177e4SLinus Torvalds * Close down a transport socket 4101da177e4SLinus Torvalds */ 4111da177e4SLinus Torvalds static void 4121da177e4SLinus Torvalds xprt_close(struct rpc_xprt *xprt) 4131da177e4SLinus Torvalds { 4141da177e4SLinus Torvalds struct socket *sock = xprt->sock; 4151da177e4SLinus Torvalds struct sock *sk = xprt->inet; 4161da177e4SLinus Torvalds 4171da177e4SLinus Torvalds if (!sk) 4181da177e4SLinus Torvalds return; 4191da177e4SLinus Torvalds 4201da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 4211da177e4SLinus Torvalds xprt->inet = NULL; 4221da177e4SLinus Torvalds xprt->sock = NULL; 4231da177e4SLinus Torvalds 4241da177e4SLinus Torvalds sk->sk_user_data = NULL; 4251da177e4SLinus Torvalds sk->sk_data_ready = xprt->old_data_ready; 4261da177e4SLinus Torvalds sk->sk_state_change = xprt->old_state_change; 4271da177e4SLinus Torvalds sk->sk_write_space = xprt->old_write_space; 4281da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 4291da177e4SLinus Torvalds 4301da177e4SLinus Torvalds sk->sk_no_check = 0; 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds sock_release(sock); 4331da177e4SLinus Torvalds } 4341da177e4SLinus Torvalds 4351da177e4SLinus Torvalds static void 4361da177e4SLinus Torvalds xprt_socket_autoclose(void *args) 4371da177e4SLinus Torvalds { 4381da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)args; 4391da177e4SLinus Torvalds 4401da177e4SLinus Torvalds xprt_disconnect(xprt); 4411da177e4SLinus Torvalds xprt_close(xprt); 4421da177e4SLinus Torvalds xprt_release_write(xprt, NULL); 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds 4451da177e4SLinus Torvalds /* 4461da177e4SLinus Torvalds * Mark a transport as disconnected 4471da177e4SLinus Torvalds */ 4481da177e4SLinus Torvalds static void 4491da177e4SLinus Torvalds xprt_disconnect(struct rpc_xprt *xprt) 4501da177e4SLinus Torvalds { 4511da177e4SLinus Torvalds dprintk("RPC: disconnected transport %p\n", xprt); 4521da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 4531da177e4SLinus Torvalds xprt_clear_connected(xprt); 4541da177e4SLinus Torvalds rpc_wake_up_status(&xprt->pending, -ENOTCONN); 4551da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 4561da177e4SLinus Torvalds } 4571da177e4SLinus Torvalds 4581da177e4SLinus Torvalds /* 4591da177e4SLinus Torvalds * Used to allow disconnection when we've been idle 4601da177e4SLinus Torvalds */ 4611da177e4SLinus Torvalds static void 4621da177e4SLinus Torvalds xprt_init_autodisconnect(unsigned long data) 4631da177e4SLinus Torvalds { 4641da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)data; 4651da177e4SLinus Torvalds 4661da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 4671da177e4SLinus Torvalds if (!list_empty(&xprt->recv) || xprt->shutdown) 4681da177e4SLinus Torvalds goto out_abort; 4691da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 4701da177e4SLinus Torvalds goto out_abort; 4711da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 4721da177e4SLinus Torvalds /* Let keventd close the socket */ 4731da177e4SLinus Torvalds if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) 4741da177e4SLinus Torvalds xprt_release_write(xprt, NULL); 4751da177e4SLinus Torvalds else 4761da177e4SLinus Torvalds schedule_work(&xprt->task_cleanup); 4771da177e4SLinus Torvalds return; 4781da177e4SLinus Torvalds out_abort: 4791da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 4801da177e4SLinus Torvalds } 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds static void xprt_socket_connect(void *args) 4831da177e4SLinus Torvalds { 4841da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)args; 4851da177e4SLinus Torvalds struct socket *sock = xprt->sock; 4861da177e4SLinus Torvalds int status = -EIO; 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds if (xprt->shutdown || xprt->addr.sin_port == 0) 4891da177e4SLinus Torvalds goto out; 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds /* 4921da177e4SLinus Torvalds * Start by resetting any existing state 4931da177e4SLinus Torvalds */ 4941da177e4SLinus Torvalds xprt_close(xprt); 4951da177e4SLinus Torvalds sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); 4961da177e4SLinus Torvalds if (sock == NULL) { 4971da177e4SLinus Torvalds /* couldn't create socket or bind to reserved port; 4981da177e4SLinus Torvalds * this is likely a permanent error, so cause an abort */ 4991da177e4SLinus Torvalds goto out; 5001da177e4SLinus Torvalds } 5011da177e4SLinus Torvalds xprt_bind_socket(xprt, sock); 5021da177e4SLinus Torvalds xprt_sock_setbufsize(xprt); 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds status = 0; 5051da177e4SLinus Torvalds if (!xprt->stream) 5061da177e4SLinus Torvalds goto out; 5071da177e4SLinus Torvalds 5081da177e4SLinus Torvalds /* 5091da177e4SLinus Torvalds * Tell the socket layer to start connecting... 5101da177e4SLinus Torvalds */ 5111da177e4SLinus Torvalds status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, 5121da177e4SLinus Torvalds sizeof(xprt->addr), O_NONBLOCK); 5131da177e4SLinus Torvalds dprintk("RPC: %p connect status %d connected %d sock state %d\n", 5141da177e4SLinus Torvalds xprt, -status, xprt_connected(xprt), sock->sk->sk_state); 5151da177e4SLinus Torvalds if (status < 0) { 5161da177e4SLinus Torvalds switch (status) { 5171da177e4SLinus Torvalds case -EINPROGRESS: 5181da177e4SLinus Torvalds case -EALREADY: 5191da177e4SLinus Torvalds goto out_clear; 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds } 5221da177e4SLinus Torvalds out: 5231da177e4SLinus Torvalds if (status < 0) 5241da177e4SLinus Torvalds rpc_wake_up_status(&xprt->pending, status); 5251da177e4SLinus Torvalds else 5261da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 5271da177e4SLinus Torvalds out_clear: 5281da177e4SLinus Torvalds smp_mb__before_clear_bit(); 5291da177e4SLinus Torvalds clear_bit(XPRT_CONNECTING, &xprt->sockstate); 5301da177e4SLinus Torvalds smp_mb__after_clear_bit(); 5311da177e4SLinus Torvalds } 5321da177e4SLinus Torvalds 5331da177e4SLinus Torvalds /* 5341da177e4SLinus Torvalds * Attempt to connect a TCP socket. 5351da177e4SLinus Torvalds * 5361da177e4SLinus Torvalds */ 5371da177e4SLinus Torvalds void xprt_connect(struct rpc_task *task) 5381da177e4SLinus Torvalds { 5391da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 5401da177e4SLinus Torvalds 5411da177e4SLinus Torvalds dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, 5421da177e4SLinus Torvalds xprt, (xprt_connected(xprt) ? "is" : "is not")); 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds if (xprt->shutdown) { 5451da177e4SLinus Torvalds task->tk_status = -EIO; 5461da177e4SLinus Torvalds return; 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds if (!xprt->addr.sin_port) { 5491da177e4SLinus Torvalds task->tk_status = -EIO; 5501da177e4SLinus Torvalds return; 5511da177e4SLinus Torvalds } 5521da177e4SLinus Torvalds if (!xprt_lock_write(xprt, task)) 5531da177e4SLinus Torvalds return; 5541da177e4SLinus Torvalds if (xprt_connected(xprt)) 5551da177e4SLinus Torvalds goto out_write; 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds if (task->tk_rqstp) 5581da177e4SLinus Torvalds task->tk_rqstp->rq_bytes_sent = 0; 5591da177e4SLinus Torvalds 5601da177e4SLinus Torvalds task->tk_timeout = RPC_CONNECT_TIMEOUT; 5611da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); 5621da177e4SLinus Torvalds if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { 5631da177e4SLinus Torvalds /* Note: if we are here due to a dropped connection 5641da177e4SLinus Torvalds * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ 5651da177e4SLinus Torvalds * seconds 5661da177e4SLinus Torvalds */ 5671da177e4SLinus Torvalds if (xprt->sock != NULL) 5681da177e4SLinus Torvalds schedule_delayed_work(&xprt->sock_connect, 5691da177e4SLinus Torvalds RPC_REESTABLISH_TIMEOUT); 570ae388462SChuck Lever else { 5711da177e4SLinus Torvalds schedule_work(&xprt->sock_connect); 572ae388462SChuck Lever if (!RPC_IS_ASYNC(task)) 573ae388462SChuck Lever flush_scheduled_work(); 574ae388462SChuck Lever } 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds return; 5771da177e4SLinus Torvalds out_write: 5781da177e4SLinus Torvalds xprt_release_write(xprt, task); 5791da177e4SLinus Torvalds } 5801da177e4SLinus Torvalds 5811da177e4SLinus Torvalds /* 5821da177e4SLinus Torvalds * We arrive here when awoken from waiting on connection establishment. 5831da177e4SLinus Torvalds */ 5841da177e4SLinus Torvalds static void 5851da177e4SLinus Torvalds xprt_connect_status(struct rpc_task *task) 5861da177e4SLinus Torvalds { 5871da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds if (task->tk_status >= 0) { 5901da177e4SLinus Torvalds dprintk("RPC: %4d xprt_connect_status: connection established\n", 5911da177e4SLinus Torvalds task->tk_pid); 5921da177e4SLinus Torvalds return; 5931da177e4SLinus Torvalds } 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds switch (task->tk_status) { 5961da177e4SLinus Torvalds case -ECONNREFUSED: 5971da177e4SLinus Torvalds case -ECONNRESET: 59823475d66SChuck Lever dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n", 59923475d66SChuck Lever task->tk_pid, task->tk_client->cl_server); 60023475d66SChuck Lever break; 6011da177e4SLinus Torvalds case -ENOTCONN: 60223475d66SChuck Lever dprintk("RPC: %4d xprt_connect_status: connection broken\n", 60323475d66SChuck Lever task->tk_pid); 60423475d66SChuck Lever break; 6051da177e4SLinus Torvalds case -ETIMEDOUT: 60623475d66SChuck Lever dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n", 6071da177e4SLinus Torvalds task->tk_pid); 6081da177e4SLinus Torvalds break; 6091da177e4SLinus Torvalds default: 61023475d66SChuck Lever dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n", 61123475d66SChuck Lever task->tk_pid, -task->tk_status, task->tk_client->cl_server); 6121da177e4SLinus Torvalds xprt_release_write(xprt, task); 61323475d66SChuck Lever task->tk_status = -EIO; 61423475d66SChuck Lever return; 61523475d66SChuck Lever } 61623475d66SChuck Lever 61723475d66SChuck Lever /* if soft mounted, just cause this RPC to fail */ 61823475d66SChuck Lever if (RPC_IS_SOFT(task)) { 61923475d66SChuck Lever xprt_release_write(xprt, task); 62023475d66SChuck Lever task->tk_status = -EIO; 62123475d66SChuck Lever } 6221da177e4SLinus Torvalds } 6231da177e4SLinus Torvalds 6241da177e4SLinus Torvalds /* 6251da177e4SLinus Torvalds * Look up the RPC request corresponding to a reply, and then lock it. 6261da177e4SLinus Torvalds */ 6271da177e4SLinus Torvalds static inline struct rpc_rqst * 6281da177e4SLinus Torvalds xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) 6291da177e4SLinus Torvalds { 6301da177e4SLinus Torvalds struct list_head *pos; 6311da177e4SLinus Torvalds struct rpc_rqst *req = NULL; 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds list_for_each(pos, &xprt->recv) { 6341da177e4SLinus Torvalds struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); 6351da177e4SLinus Torvalds if (entry->rq_xid == xid) { 6361da177e4SLinus Torvalds req = entry; 6371da177e4SLinus Torvalds break; 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds } 6401da177e4SLinus Torvalds return req; 6411da177e4SLinus Torvalds } 6421da177e4SLinus Torvalds 6431da177e4SLinus Torvalds /* 6441da177e4SLinus Torvalds * Complete reply received. 6451da177e4SLinus Torvalds * The TCP code relies on us to remove the request from xprt->pending. 6461da177e4SLinus Torvalds */ 6471da177e4SLinus Torvalds static void 6481da177e4SLinus Torvalds xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) 6491da177e4SLinus Torvalds { 6501da177e4SLinus Torvalds struct rpc_task *task = req->rq_task; 6511da177e4SLinus Torvalds struct rpc_clnt *clnt = task->tk_client; 6521da177e4SLinus Torvalds 6531da177e4SLinus Torvalds /* Adjust congestion window */ 6541da177e4SLinus Torvalds if (!xprt->nocong) { 6551da177e4SLinus Torvalds unsigned timer = task->tk_msg.rpc_proc->p_timer; 6561da177e4SLinus Torvalds xprt_adjust_cwnd(xprt, copied); 6571da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 6581da177e4SLinus Torvalds if (timer) { 6591da177e4SLinus Torvalds if (req->rq_ntrans == 1) 6601da177e4SLinus Torvalds rpc_update_rtt(clnt->cl_rtt, timer, 6611da177e4SLinus Torvalds (long)jiffies - req->rq_xtime); 6621da177e4SLinus Torvalds rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); 6631da177e4SLinus Torvalds } 6641da177e4SLinus Torvalds } 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds #ifdef RPC_PROFILE 6671da177e4SLinus Torvalds /* Profile only reads for now */ 6681da177e4SLinus Torvalds if (copied > 1024) { 6691da177e4SLinus Torvalds static unsigned long nextstat; 6701da177e4SLinus Torvalds static unsigned long pkt_rtt, pkt_len, pkt_cnt; 6711da177e4SLinus Torvalds 6721da177e4SLinus Torvalds pkt_cnt++; 6731da177e4SLinus Torvalds pkt_len += req->rq_slen + copied; 6741da177e4SLinus Torvalds pkt_rtt += jiffies - req->rq_xtime; 6751da177e4SLinus Torvalds if (time_before(nextstat, jiffies)) { 6761da177e4SLinus Torvalds printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); 6771da177e4SLinus Torvalds printk("RPC: %ld %ld %ld %ld stat\n", 6781da177e4SLinus Torvalds jiffies, pkt_cnt, pkt_len, pkt_rtt); 6791da177e4SLinus Torvalds pkt_rtt = pkt_len = pkt_cnt = 0; 6801da177e4SLinus Torvalds nextstat = jiffies + 5 * HZ; 6811da177e4SLinus Torvalds } 6821da177e4SLinus Torvalds } 6831da177e4SLinus Torvalds #endif 6841da177e4SLinus Torvalds 6851da177e4SLinus Torvalds dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); 6861da177e4SLinus Torvalds list_del_init(&req->rq_list); 6871da177e4SLinus Torvalds req->rq_received = req->rq_private_buf.len = copied; 6881da177e4SLinus Torvalds 6891da177e4SLinus Torvalds /* ... and wake up the process. */ 6901da177e4SLinus Torvalds rpc_wake_up_task(task); 6911da177e4SLinus Torvalds return; 6921da177e4SLinus Torvalds } 6931da177e4SLinus Torvalds 6941da177e4SLinus Torvalds static size_t 6951da177e4SLinus Torvalds skb_read_bits(skb_reader_t *desc, void *to, size_t len) 6961da177e4SLinus Torvalds { 6971da177e4SLinus Torvalds if (len > desc->count) 6981da177e4SLinus Torvalds len = desc->count; 6991da177e4SLinus Torvalds if (skb_copy_bits(desc->skb, desc->offset, to, len)) 7001da177e4SLinus Torvalds return 0; 7011da177e4SLinus Torvalds desc->count -= len; 7021da177e4SLinus Torvalds desc->offset += len; 7031da177e4SLinus Torvalds return len; 7041da177e4SLinus Torvalds } 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds static size_t 7071da177e4SLinus Torvalds skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) 7081da177e4SLinus Torvalds { 7091da177e4SLinus Torvalds unsigned int csum2, pos; 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds if (len > desc->count) 7121da177e4SLinus Torvalds len = desc->count; 7131da177e4SLinus Torvalds pos = desc->offset; 7141da177e4SLinus Torvalds csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); 7151da177e4SLinus Torvalds desc->csum = csum_block_add(desc->csum, csum2, pos); 7161da177e4SLinus Torvalds desc->count -= len; 7171da177e4SLinus Torvalds desc->offset += len; 7181da177e4SLinus Torvalds return len; 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds 7211da177e4SLinus Torvalds /* 7221da177e4SLinus Torvalds * We have set things up such that we perform the checksum of the UDP 7231da177e4SLinus Torvalds * packet in parallel with the copies into the RPC client iovec. -DaveM 7241da177e4SLinus Torvalds */ 7251da177e4SLinus Torvalds int 7261da177e4SLinus Torvalds csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) 7271da177e4SLinus Torvalds { 7281da177e4SLinus Torvalds skb_reader_t desc; 7291da177e4SLinus Torvalds 7301da177e4SLinus Torvalds desc.skb = skb; 7311da177e4SLinus Torvalds desc.offset = sizeof(struct udphdr); 7321da177e4SLinus Torvalds desc.count = skb->len - desc.offset; 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_UNNECESSARY) 7351da177e4SLinus Torvalds goto no_checksum; 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds desc.csum = csum_partial(skb->data, desc.offset, skb->csum); 738e053d1abSOlaf Kirch if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) 739e053d1abSOlaf Kirch return -1; 7401da177e4SLinus Torvalds if (desc.offset != skb->len) { 7411da177e4SLinus Torvalds unsigned int csum2; 7421da177e4SLinus Torvalds csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); 7431da177e4SLinus Torvalds desc.csum = csum_block_add(desc.csum, csum2, desc.offset); 7441da177e4SLinus Torvalds } 7451da177e4SLinus Torvalds if (desc.count) 7461da177e4SLinus Torvalds return -1; 7471da177e4SLinus Torvalds if ((unsigned short)csum_fold(desc.csum)) 7481da177e4SLinus Torvalds return -1; 7491da177e4SLinus Torvalds return 0; 7501da177e4SLinus Torvalds no_checksum: 751e053d1abSOlaf Kirch if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) 752e053d1abSOlaf Kirch return -1; 7531da177e4SLinus Torvalds if (desc.count) 7541da177e4SLinus Torvalds return -1; 7551da177e4SLinus Torvalds return 0; 7561da177e4SLinus Torvalds } 7571da177e4SLinus Torvalds 7581da177e4SLinus Torvalds /* 7591da177e4SLinus Torvalds * Input handler for RPC replies. Called from a bottom half and hence 7601da177e4SLinus Torvalds * atomic. 7611da177e4SLinus Torvalds */ 7621da177e4SLinus Torvalds static void 7631da177e4SLinus Torvalds udp_data_ready(struct sock *sk, int len) 7641da177e4SLinus Torvalds { 7651da177e4SLinus Torvalds struct rpc_task *task; 7661da177e4SLinus Torvalds struct rpc_xprt *xprt; 7671da177e4SLinus Torvalds struct rpc_rqst *rovr; 7681da177e4SLinus Torvalds struct sk_buff *skb; 7691da177e4SLinus Torvalds int err, repsize, copied; 7701da177e4SLinus Torvalds u32 _xid, *xp; 7711da177e4SLinus Torvalds 7721da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 7731da177e4SLinus Torvalds dprintk("RPC: udp_data_ready...\n"); 7741da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) { 7751da177e4SLinus Torvalds printk("RPC: udp_data_ready request not found!\n"); 7761da177e4SLinus Torvalds goto out; 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds 7791da177e4SLinus Torvalds dprintk("RPC: udp_data_ready client %p\n", xprt); 7801da177e4SLinus Torvalds 7811da177e4SLinus Torvalds if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) 7821da177e4SLinus Torvalds goto out; 7831da177e4SLinus Torvalds 7841da177e4SLinus Torvalds if (xprt->shutdown) 7851da177e4SLinus Torvalds goto dropit; 7861da177e4SLinus Torvalds 7871da177e4SLinus Torvalds repsize = skb->len - sizeof(struct udphdr); 7881da177e4SLinus Torvalds if (repsize < 4) { 7891da177e4SLinus Torvalds printk("RPC: impossible RPC reply size %d!\n", repsize); 7901da177e4SLinus Torvalds goto dropit; 7911da177e4SLinus Torvalds } 7921da177e4SLinus Torvalds 7931da177e4SLinus Torvalds /* Copy the XID from the skb... */ 7941da177e4SLinus Torvalds xp = skb_header_pointer(skb, sizeof(struct udphdr), 7951da177e4SLinus Torvalds sizeof(_xid), &_xid); 7961da177e4SLinus Torvalds if (xp == NULL) 7971da177e4SLinus Torvalds goto dropit; 7981da177e4SLinus Torvalds 7991da177e4SLinus Torvalds /* Look up and lock the request corresponding to the given XID */ 8001da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 8011da177e4SLinus Torvalds rovr = xprt_lookup_rqst(xprt, *xp); 8021da177e4SLinus Torvalds if (!rovr) 8031da177e4SLinus Torvalds goto out_unlock; 8041da177e4SLinus Torvalds task = rovr->rq_task; 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds dprintk("RPC: %4d received reply\n", task->tk_pid); 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds if ((copied = rovr->rq_private_buf.buflen) > repsize) 8091da177e4SLinus Torvalds copied = repsize; 8101da177e4SLinus Torvalds 8111da177e4SLinus Torvalds /* Suck it into the iovec, verify checksum if not done by hw. */ 8121da177e4SLinus Torvalds if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) 8131da177e4SLinus Torvalds goto out_unlock; 8141da177e4SLinus Torvalds 8151da177e4SLinus Torvalds /* Something worked... */ 8161da177e4SLinus Torvalds dst_confirm(skb->dst); 8171da177e4SLinus Torvalds 8181da177e4SLinus Torvalds xprt_complete_rqst(xprt, rovr, copied); 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds out_unlock: 8211da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 8221da177e4SLinus Torvalds dropit: 8231da177e4SLinus Torvalds skb_free_datagram(sk, skb); 8241da177e4SLinus Torvalds out: 8251da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 8261da177e4SLinus Torvalds } 8271da177e4SLinus Torvalds 8281da177e4SLinus Torvalds /* 8291da177e4SLinus Torvalds * Copy from an skb into memory and shrink the skb. 8301da177e4SLinus Torvalds */ 8311da177e4SLinus Torvalds static inline size_t 8321da177e4SLinus Torvalds tcp_copy_data(skb_reader_t *desc, void *p, size_t len) 8331da177e4SLinus Torvalds { 8341da177e4SLinus Torvalds if (len > desc->count) 8351da177e4SLinus Torvalds len = desc->count; 8367e06b53dSTrond Myklebust if (skb_copy_bits(desc->skb, desc->offset, p, len)) { 8377e06b53dSTrond Myklebust dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", 8387e06b53dSTrond Myklebust len, desc->count); 8391da177e4SLinus Torvalds return 0; 8407e06b53dSTrond Myklebust } 8411da177e4SLinus Torvalds desc->offset += len; 8421da177e4SLinus Torvalds desc->count -= len; 8437e06b53dSTrond Myklebust dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", 8447e06b53dSTrond Myklebust len, desc->count); 8451da177e4SLinus Torvalds return len; 8461da177e4SLinus Torvalds } 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds /* 8491da177e4SLinus Torvalds * TCP read fragment marker 8501da177e4SLinus Torvalds */ 8511da177e4SLinus Torvalds static inline void 8521da177e4SLinus Torvalds tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) 8531da177e4SLinus Torvalds { 8541da177e4SLinus Torvalds size_t len, used; 8551da177e4SLinus Torvalds char *p; 8561da177e4SLinus Torvalds 8571da177e4SLinus Torvalds p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; 8581da177e4SLinus Torvalds len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; 8591da177e4SLinus Torvalds used = tcp_copy_data(desc, p, len); 8601da177e4SLinus Torvalds xprt->tcp_offset += used; 8611da177e4SLinus Torvalds if (used != len) 8621da177e4SLinus Torvalds return; 8631da177e4SLinus Torvalds xprt->tcp_reclen = ntohl(xprt->tcp_recm); 8641da177e4SLinus Torvalds if (xprt->tcp_reclen & 0x80000000) 8651da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_LAST_FRAG; 8661da177e4SLinus Torvalds else 8671da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_LAST_FRAG; 8681da177e4SLinus Torvalds xprt->tcp_reclen &= 0x7fffffff; 8691da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_RECM; 8701da177e4SLinus Torvalds xprt->tcp_offset = 0; 8711da177e4SLinus Torvalds /* Sanity check of the record length */ 8721da177e4SLinus Torvalds if (xprt->tcp_reclen < 4) { 8731da177e4SLinus Torvalds printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); 8741da177e4SLinus Torvalds xprt_disconnect(xprt); 8751da177e4SLinus Torvalds } 8761da177e4SLinus Torvalds dprintk("RPC: reading TCP record fragment of length %d\n", 8771da177e4SLinus Torvalds xprt->tcp_reclen); 8781da177e4SLinus Torvalds } 8791da177e4SLinus Torvalds 8801da177e4SLinus Torvalds static void 8811da177e4SLinus Torvalds tcp_check_recm(struct rpc_xprt *xprt) 8821da177e4SLinus Torvalds { 8837e06b53dSTrond Myklebust dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", 8847e06b53dSTrond Myklebust xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); 8851da177e4SLinus Torvalds if (xprt->tcp_offset == xprt->tcp_reclen) { 8861da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_RECM; 8871da177e4SLinus Torvalds xprt->tcp_offset = 0; 8881da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_LAST_FRAG) { 8891da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 8901da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_XID; 8911da177e4SLinus Torvalds xprt->tcp_copied = 0; 8921da177e4SLinus Torvalds } 8931da177e4SLinus Torvalds } 8941da177e4SLinus Torvalds } 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds /* 8971da177e4SLinus Torvalds * TCP read xid 8981da177e4SLinus Torvalds */ 8991da177e4SLinus Torvalds static inline void 9001da177e4SLinus Torvalds tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) 9011da177e4SLinus Torvalds { 9021da177e4SLinus Torvalds size_t len, used; 9031da177e4SLinus Torvalds char *p; 9041da177e4SLinus Torvalds 9051da177e4SLinus Torvalds len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; 9061da177e4SLinus Torvalds dprintk("RPC: reading XID (%Zu bytes)\n", len); 9071da177e4SLinus Torvalds p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; 9081da177e4SLinus Torvalds used = tcp_copy_data(desc, p, len); 9091da177e4SLinus Torvalds xprt->tcp_offset += used; 9101da177e4SLinus Torvalds if (used != len) 9111da177e4SLinus Torvalds return; 9121da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_XID; 9131da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_DATA; 9141da177e4SLinus Torvalds xprt->tcp_copied = 4; 9151da177e4SLinus Torvalds dprintk("RPC: reading reply for XID %08x\n", 9161da177e4SLinus Torvalds ntohl(xprt->tcp_xid)); 9171da177e4SLinus Torvalds tcp_check_recm(xprt); 9181da177e4SLinus Torvalds } 9191da177e4SLinus Torvalds 9201da177e4SLinus Torvalds /* 9211da177e4SLinus Torvalds * TCP read and complete request 9221da177e4SLinus Torvalds */ 9231da177e4SLinus Torvalds static inline void 9241da177e4SLinus Torvalds tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) 9251da177e4SLinus Torvalds { 9261da177e4SLinus Torvalds struct rpc_rqst *req; 9271da177e4SLinus Torvalds struct xdr_buf *rcvbuf; 9281da177e4SLinus Torvalds size_t len; 9297e06b53dSTrond Myklebust ssize_t r; 9301da177e4SLinus Torvalds 9311da177e4SLinus Torvalds /* Find and lock the request corresponding to this xid */ 9321da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 9331da177e4SLinus Torvalds req = xprt_lookup_rqst(xprt, xprt->tcp_xid); 9341da177e4SLinus Torvalds if (!req) { 9351da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9361da177e4SLinus Torvalds dprintk("RPC: XID %08x request not found!\n", 9371da177e4SLinus Torvalds ntohl(xprt->tcp_xid)); 9381da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 9391da177e4SLinus Torvalds return; 9401da177e4SLinus Torvalds } 9411da177e4SLinus Torvalds 9421da177e4SLinus Torvalds rcvbuf = &req->rq_private_buf; 9431da177e4SLinus Torvalds len = desc->count; 9441da177e4SLinus Torvalds if (len > xprt->tcp_reclen - xprt->tcp_offset) { 9451da177e4SLinus Torvalds skb_reader_t my_desc; 9461da177e4SLinus Torvalds 9471da177e4SLinus Torvalds len = xprt->tcp_reclen - xprt->tcp_offset; 9481da177e4SLinus Torvalds memcpy(&my_desc, desc, sizeof(my_desc)); 9491da177e4SLinus Torvalds my_desc.count = len; 950e053d1abSOlaf Kirch r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 9511da177e4SLinus Torvalds &my_desc, tcp_copy_data); 9527e06b53dSTrond Myklebust desc->count -= r; 9537e06b53dSTrond Myklebust desc->offset += r; 9541da177e4SLinus Torvalds } else 955e053d1abSOlaf Kirch r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 9561da177e4SLinus Torvalds desc, tcp_copy_data); 9571da177e4SLinus Torvalds 9587e06b53dSTrond Myklebust if (r > 0) { 9597e06b53dSTrond Myklebust xprt->tcp_copied += r; 9607e06b53dSTrond Myklebust xprt->tcp_offset += r; 9617e06b53dSTrond Myklebust } 9627e06b53dSTrond Myklebust if (r != len) { 963e053d1abSOlaf Kirch /* Error when copying to the receive buffer, 964e053d1abSOlaf Kirch * usually because we weren't able to allocate 965e053d1abSOlaf Kirch * additional buffer pages. All we can do now 966e053d1abSOlaf Kirch * is turn off XPRT_COPY_DATA, so the request 967e053d1abSOlaf Kirch * will not receive any additional updates, 968e053d1abSOlaf Kirch * and time out. 969e053d1abSOlaf Kirch * Any remaining data from this record will 970e053d1abSOlaf Kirch * be discarded. 971e053d1abSOlaf Kirch */ 972e053d1abSOlaf Kirch xprt->tcp_flags &= ~XPRT_COPY_DATA; 9737e06b53dSTrond Myklebust dprintk("RPC: XID %08x truncated request\n", 9747e06b53dSTrond Myklebust ntohl(xprt->tcp_xid)); 9757e06b53dSTrond Myklebust dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", 9767e06b53dSTrond Myklebust xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); 977e053d1abSOlaf Kirch goto out; 978e053d1abSOlaf Kirch } 979e053d1abSOlaf Kirch 980c54d7e03SDavid S. Miller dprintk("RPC: XID %08x read %Zd bytes\n", 9817e06b53dSTrond Myklebust ntohl(xprt->tcp_xid), r); 9827e06b53dSTrond Myklebust dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", 9837e06b53dSTrond Myklebust xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); 9847e06b53dSTrond Myklebust 9851da177e4SLinus Torvalds if (xprt->tcp_copied == req->rq_private_buf.buflen) 9861da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9871da177e4SLinus Torvalds else if (xprt->tcp_offset == xprt->tcp_reclen) { 9881da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_LAST_FRAG) 9891da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9901da177e4SLinus Torvalds } 9911da177e4SLinus Torvalds 9927e06b53dSTrond Myklebust out: 9931da177e4SLinus Torvalds if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { 9941da177e4SLinus Torvalds dprintk("RPC: %4d received reply complete\n", 9951da177e4SLinus Torvalds req->rq_task->tk_pid); 9961da177e4SLinus Torvalds xprt_complete_rqst(xprt, req, xprt->tcp_copied); 9971da177e4SLinus Torvalds } 9981da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 9991da177e4SLinus Torvalds tcp_check_recm(xprt); 10001da177e4SLinus Torvalds } 10011da177e4SLinus Torvalds 10021da177e4SLinus Torvalds /* 10031da177e4SLinus Torvalds * TCP discard extra bytes from a short read 10041da177e4SLinus Torvalds */ 10051da177e4SLinus Torvalds static inline void 10061da177e4SLinus Torvalds tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) 10071da177e4SLinus Torvalds { 10081da177e4SLinus Torvalds size_t len; 10091da177e4SLinus Torvalds 10101da177e4SLinus Torvalds len = xprt->tcp_reclen - xprt->tcp_offset; 10111da177e4SLinus Torvalds if (len > desc->count) 10121da177e4SLinus Torvalds len = desc->count; 10131da177e4SLinus Torvalds desc->count -= len; 10141da177e4SLinus Torvalds desc->offset += len; 10151da177e4SLinus Torvalds xprt->tcp_offset += len; 1016c54d7e03SDavid S. Miller dprintk("RPC: discarded %Zu bytes\n", len); 10171da177e4SLinus Torvalds tcp_check_recm(xprt); 10181da177e4SLinus Torvalds } 10191da177e4SLinus Torvalds 10201da177e4SLinus Torvalds /* 10211da177e4SLinus Torvalds * TCP record receive routine 10221da177e4SLinus Torvalds * We first have to grab the record marker, then the XID, then the data. 10231da177e4SLinus Torvalds */ 10241da177e4SLinus Torvalds static int 10251da177e4SLinus Torvalds tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 10261da177e4SLinus Torvalds unsigned int offset, size_t len) 10271da177e4SLinus Torvalds { 10281da177e4SLinus Torvalds struct rpc_xprt *xprt = rd_desc->arg.data; 10291da177e4SLinus Torvalds skb_reader_t desc = { 10301da177e4SLinus Torvalds .skb = skb, 10311da177e4SLinus Torvalds .offset = offset, 10321da177e4SLinus Torvalds .count = len, 10331da177e4SLinus Torvalds .csum = 0 10341da177e4SLinus Torvalds }; 10351da177e4SLinus Torvalds 10361da177e4SLinus Torvalds dprintk("RPC: tcp_data_recv\n"); 10371da177e4SLinus Torvalds do { 10381da177e4SLinus Torvalds /* Read in a new fragment marker if necessary */ 10391da177e4SLinus Torvalds /* Can we ever really expect to get completely empty fragments? */ 10401da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_RECM) { 10411da177e4SLinus Torvalds tcp_read_fraghdr(xprt, &desc); 10421da177e4SLinus Torvalds continue; 10431da177e4SLinus Torvalds } 10441da177e4SLinus Torvalds /* Read in the xid if necessary */ 10451da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_XID) { 10461da177e4SLinus Torvalds tcp_read_xid(xprt, &desc); 10471da177e4SLinus Torvalds continue; 10481da177e4SLinus Torvalds } 10491da177e4SLinus Torvalds /* Read in the request data */ 10501da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_DATA) { 10511da177e4SLinus Torvalds tcp_read_request(xprt, &desc); 10521da177e4SLinus Torvalds continue; 10531da177e4SLinus Torvalds } 10541da177e4SLinus Torvalds /* Skip over any trailing bytes on short reads */ 10551da177e4SLinus Torvalds tcp_read_discard(xprt, &desc); 10561da177e4SLinus Torvalds } while (desc.count); 10571da177e4SLinus Torvalds dprintk("RPC: tcp_data_recv done\n"); 10581da177e4SLinus Torvalds return len - desc.count; 10591da177e4SLinus Torvalds } 10601da177e4SLinus Torvalds 10611da177e4SLinus Torvalds static void tcp_data_ready(struct sock *sk, int bytes) 10621da177e4SLinus Torvalds { 10631da177e4SLinus Torvalds struct rpc_xprt *xprt; 10641da177e4SLinus Torvalds read_descriptor_t rd_desc; 10651da177e4SLinus Torvalds 10661da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 10671da177e4SLinus Torvalds dprintk("RPC: tcp_data_ready...\n"); 10681da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) { 10691da177e4SLinus Torvalds printk("RPC: tcp_data_ready socket info not found!\n"); 10701da177e4SLinus Torvalds goto out; 10711da177e4SLinus Torvalds } 10721da177e4SLinus Torvalds if (xprt->shutdown) 10731da177e4SLinus Torvalds goto out; 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds /* We use rd_desc to pass struct xprt to tcp_data_recv */ 10761da177e4SLinus Torvalds rd_desc.arg.data = xprt; 10771da177e4SLinus Torvalds rd_desc.count = 65536; 10781da177e4SLinus Torvalds tcp_read_sock(sk, &rd_desc, tcp_data_recv); 10791da177e4SLinus Torvalds out: 10801da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 10811da177e4SLinus Torvalds } 10821da177e4SLinus Torvalds 10831da177e4SLinus Torvalds static void 10841da177e4SLinus Torvalds tcp_state_change(struct sock *sk) 10851da177e4SLinus Torvalds { 10861da177e4SLinus Torvalds struct rpc_xprt *xprt; 10871da177e4SLinus Torvalds 10881da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 10891da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) 10901da177e4SLinus Torvalds goto out; 10911da177e4SLinus Torvalds dprintk("RPC: tcp_state_change client %p...\n", xprt); 10921da177e4SLinus Torvalds dprintk("RPC: state %x conn %d dead %d zapped %d\n", 10931da177e4SLinus Torvalds sk->sk_state, xprt_connected(xprt), 10941da177e4SLinus Torvalds sock_flag(sk, SOCK_DEAD), 10951da177e4SLinus Torvalds sock_flag(sk, SOCK_ZAPPED)); 10961da177e4SLinus Torvalds 10971da177e4SLinus Torvalds switch (sk->sk_state) { 10981da177e4SLinus Torvalds case TCP_ESTABLISHED: 10991da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 11001da177e4SLinus Torvalds if (!xprt_test_and_set_connected(xprt)) { 11011da177e4SLinus Torvalds /* Reset TCP record info */ 11021da177e4SLinus Torvalds xprt->tcp_offset = 0; 11031da177e4SLinus Torvalds xprt->tcp_reclen = 0; 11041da177e4SLinus Torvalds xprt->tcp_copied = 0; 11051da177e4SLinus Torvalds xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; 11061da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 11071da177e4SLinus Torvalds } 11081da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 11091da177e4SLinus Torvalds break; 11101da177e4SLinus Torvalds case TCP_SYN_SENT: 11111da177e4SLinus Torvalds case TCP_SYN_RECV: 11121da177e4SLinus Torvalds break; 11131da177e4SLinus Torvalds default: 111420e5ac82SChuck Lever xprt_disconnect(xprt); 11151da177e4SLinus Torvalds break; 11161da177e4SLinus Torvalds } 11171da177e4SLinus Torvalds out: 11181da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 11191da177e4SLinus Torvalds } 11201da177e4SLinus Torvalds 11211da177e4SLinus Torvalds /* 11221da177e4SLinus Torvalds * Called when more output buffer space is available for this socket. 11231da177e4SLinus Torvalds * We try not to wake our writers until they can make "significant" 11241da177e4SLinus Torvalds * progress, otherwise we'll waste resources thrashing sock_sendmsg 11251da177e4SLinus Torvalds * with a bunch of small requests. 11261da177e4SLinus Torvalds */ 11271da177e4SLinus Torvalds static void 11281da177e4SLinus Torvalds xprt_write_space(struct sock *sk) 11291da177e4SLinus Torvalds { 11301da177e4SLinus Torvalds struct rpc_xprt *xprt; 11311da177e4SLinus Torvalds struct socket *sock; 11321da177e4SLinus Torvalds 11331da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 11341da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) 11351da177e4SLinus Torvalds goto out; 11361da177e4SLinus Torvalds if (xprt->shutdown) 11371da177e4SLinus Torvalds goto out; 11381da177e4SLinus Torvalds 11391da177e4SLinus Torvalds /* Wait until we have enough socket memory */ 11401da177e4SLinus Torvalds if (xprt->stream) { 11411da177e4SLinus Torvalds /* from net/core/stream.c:sk_stream_write_space */ 11421da177e4SLinus Torvalds if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) 11431da177e4SLinus Torvalds goto out; 11441da177e4SLinus Torvalds } else { 11451da177e4SLinus Torvalds /* from net/core/sock.c:sock_def_write_space */ 11461da177e4SLinus Torvalds if (!sock_writeable(sk)) 11471da177e4SLinus Torvalds goto out; 11481da177e4SLinus Torvalds } 11491da177e4SLinus Torvalds 11501da177e4SLinus Torvalds if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) 11511da177e4SLinus Torvalds goto out; 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 11541da177e4SLinus Torvalds if (xprt->snd_task) 11551da177e4SLinus Torvalds rpc_wake_up_task(xprt->snd_task); 11561da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 11571da177e4SLinus Torvalds out: 11581da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 11591da177e4SLinus Torvalds } 11601da177e4SLinus Torvalds 11611da177e4SLinus Torvalds /* 11621da177e4SLinus Torvalds * RPC receive timeout handler. 11631da177e4SLinus Torvalds */ 11641da177e4SLinus Torvalds static void 11651da177e4SLinus Torvalds xprt_timer(struct rpc_task *task) 11661da177e4SLinus Torvalds { 11671da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 11681da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 11691da177e4SLinus Torvalds 11701da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 11711da177e4SLinus Torvalds if (req->rq_received) 11721da177e4SLinus Torvalds goto out; 11731da177e4SLinus Torvalds 11741da177e4SLinus Torvalds xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); 11751da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 11761da177e4SLinus Torvalds 11771da177e4SLinus Torvalds dprintk("RPC: %4d xprt_timer (%s request)\n", 11781da177e4SLinus Torvalds task->tk_pid, req ? "pending" : "backlogged"); 11791da177e4SLinus Torvalds 11801da177e4SLinus Torvalds task->tk_status = -ETIMEDOUT; 11811da177e4SLinus Torvalds out: 11821da177e4SLinus Torvalds task->tk_timeout = 0; 11831da177e4SLinus Torvalds rpc_wake_up_task(task); 11841da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 11851da177e4SLinus Torvalds } 11861da177e4SLinus Torvalds 11871da177e4SLinus Torvalds /* 11881da177e4SLinus Torvalds * Place the actual RPC call. 11891da177e4SLinus Torvalds * We have to copy the iovec because sendmsg fiddles with its contents. 11901da177e4SLinus Torvalds */ 11911da177e4SLinus Torvalds int 11921da177e4SLinus Torvalds xprt_prepare_transmit(struct rpc_task *task) 11931da177e4SLinus Torvalds { 11941da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 11951da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 11961da177e4SLinus Torvalds int err = 0; 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid); 11991da177e4SLinus Torvalds 12001da177e4SLinus Torvalds if (xprt->shutdown) 12011da177e4SLinus Torvalds return -EIO; 12021da177e4SLinus Torvalds 12031da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 12041da177e4SLinus Torvalds if (req->rq_received && !req->rq_bytes_sent) { 12051da177e4SLinus Torvalds err = req->rq_received; 12061da177e4SLinus Torvalds goto out_unlock; 12071da177e4SLinus Torvalds } 12081da177e4SLinus Torvalds if (!__xprt_lock_write(xprt, task)) { 12091da177e4SLinus Torvalds err = -EAGAIN; 12101da177e4SLinus Torvalds goto out_unlock; 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds 12131da177e4SLinus Torvalds if (!xprt_connected(xprt)) { 12141da177e4SLinus Torvalds err = -ENOTCONN; 12151da177e4SLinus Torvalds goto out_unlock; 12161da177e4SLinus Torvalds } 12171da177e4SLinus Torvalds out_unlock: 12181da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 12191da177e4SLinus Torvalds return err; 12201da177e4SLinus Torvalds } 12211da177e4SLinus Torvalds 12221da177e4SLinus Torvalds void 12231da177e4SLinus Torvalds xprt_transmit(struct rpc_task *task) 12241da177e4SLinus Torvalds { 12251da177e4SLinus Torvalds struct rpc_clnt *clnt = task->tk_client; 12261da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 12271da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 12281da177e4SLinus Torvalds int status, retry = 0; 12291da177e4SLinus Torvalds 12301da177e4SLinus Torvalds 12311da177e4SLinus Torvalds dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds /* set up everything as needed. */ 12341da177e4SLinus Torvalds /* Write the record marker */ 12351da177e4SLinus Torvalds if (xprt->stream) { 12361da177e4SLinus Torvalds u32 *marker = req->rq_svec[0].iov_base; 12371da177e4SLinus Torvalds 12381da177e4SLinus Torvalds *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); 12391da177e4SLinus Torvalds } 12401da177e4SLinus Torvalds 12411da177e4SLinus Torvalds smp_rmb(); 12421da177e4SLinus Torvalds if (!req->rq_received) { 12431da177e4SLinus Torvalds if (list_empty(&req->rq_list)) { 12441da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 12451da177e4SLinus Torvalds /* Update the softirq receive buffer */ 12461da177e4SLinus Torvalds memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 12471da177e4SLinus Torvalds sizeof(req->rq_private_buf)); 12481da177e4SLinus Torvalds /* Add request to the receive list */ 12491da177e4SLinus Torvalds list_add_tail(&req->rq_list, &xprt->recv); 12501da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 12511da177e4SLinus Torvalds xprt_reset_majortimeo(req); 12520f9dc2b1STrond Myklebust /* Turn off autodisconnect */ 12530f9dc2b1STrond Myklebust del_singleshot_timer_sync(&xprt->timer); 12541da177e4SLinus Torvalds } 12551da177e4SLinus Torvalds } else if (!req->rq_bytes_sent) 12561da177e4SLinus Torvalds return; 12571da177e4SLinus Torvalds 12581da177e4SLinus Torvalds /* Continue transmitting the packet/record. We must be careful 12591da177e4SLinus Torvalds * to cope with writespace callbacks arriving _after_ we have 12601da177e4SLinus Torvalds * called xprt_sendmsg(). 12611da177e4SLinus Torvalds */ 12621da177e4SLinus Torvalds while (1) { 12631da177e4SLinus Torvalds req->rq_xtime = jiffies; 12641da177e4SLinus Torvalds status = xprt_sendmsg(xprt, req); 12651da177e4SLinus Torvalds 12661da177e4SLinus Torvalds if (status < 0) 12671da177e4SLinus Torvalds break; 12681da177e4SLinus Torvalds 12691da177e4SLinus Torvalds if (xprt->stream) { 12701da177e4SLinus Torvalds req->rq_bytes_sent += status; 12711da177e4SLinus Torvalds 12721da177e4SLinus Torvalds /* If we've sent the entire packet, immediately 12731da177e4SLinus Torvalds * reset the count of bytes sent. */ 12741da177e4SLinus Torvalds if (req->rq_bytes_sent >= req->rq_slen) { 12751da177e4SLinus Torvalds req->rq_bytes_sent = 0; 12761da177e4SLinus Torvalds goto out_receive; 12771da177e4SLinus Torvalds } 12781da177e4SLinus Torvalds } else { 12791da177e4SLinus Torvalds if (status >= req->rq_slen) 12801da177e4SLinus Torvalds goto out_receive; 12811da177e4SLinus Torvalds status = -EAGAIN; 12821da177e4SLinus Torvalds break; 12831da177e4SLinus Torvalds } 12841da177e4SLinus Torvalds 12851da177e4SLinus Torvalds dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", 12861da177e4SLinus Torvalds task->tk_pid, req->rq_slen - req->rq_bytes_sent, 12871da177e4SLinus Torvalds req->rq_slen); 12881da177e4SLinus Torvalds 12891da177e4SLinus Torvalds status = -EAGAIN; 12901da177e4SLinus Torvalds if (retry++ > 50) 12911da177e4SLinus Torvalds break; 12921da177e4SLinus Torvalds } 12931da177e4SLinus Torvalds 12941da177e4SLinus Torvalds /* Note: at this point, task->tk_sleeping has not yet been set, 12951da177e4SLinus Torvalds * hence there is no danger of the waking up task being put on 12961da177e4SLinus Torvalds * schedq, and being picked up by a parallel run of rpciod(). 12971da177e4SLinus Torvalds */ 12981da177e4SLinus Torvalds task->tk_status = status; 12991da177e4SLinus Torvalds 13001da177e4SLinus Torvalds switch (status) { 13011da177e4SLinus Torvalds case -EAGAIN: 13021da177e4SLinus Torvalds if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { 13031da177e4SLinus Torvalds /* Protect against races with xprt_write_space */ 13041da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 13051da177e4SLinus Torvalds /* Don't race with disconnect */ 13061da177e4SLinus Torvalds if (!xprt_connected(xprt)) 13071da177e4SLinus Torvalds task->tk_status = -ENOTCONN; 13081da177e4SLinus Torvalds else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { 13091da177e4SLinus Torvalds task->tk_timeout = req->rq_timeout; 13101da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, NULL, NULL); 13111da177e4SLinus Torvalds } 13121da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 13131da177e4SLinus Torvalds return; 13141da177e4SLinus Torvalds } 13151da177e4SLinus Torvalds /* Keep holding the socket if it is blocked */ 13161da177e4SLinus Torvalds rpc_delay(task, HZ>>4); 13171da177e4SLinus Torvalds return; 13181da177e4SLinus Torvalds case -ECONNREFUSED: 13191da177e4SLinus Torvalds task->tk_timeout = RPC_REESTABLISH_TIMEOUT; 13201da177e4SLinus Torvalds rpc_sleep_on(&xprt->sending, task, NULL, NULL); 13211da177e4SLinus Torvalds case -ENOTCONN: 13221da177e4SLinus Torvalds return; 13231da177e4SLinus Torvalds default: 13241da177e4SLinus Torvalds if (xprt->stream) 13251da177e4SLinus Torvalds xprt_disconnect(xprt); 13261da177e4SLinus Torvalds } 13271da177e4SLinus Torvalds xprt_release_write(xprt, task); 13281da177e4SLinus Torvalds return; 13291da177e4SLinus Torvalds out_receive: 13301da177e4SLinus Torvalds dprintk("RPC: %4d xmit complete\n", task->tk_pid); 13311da177e4SLinus Torvalds /* Set the task's receive timeout value */ 13321da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 13331da177e4SLinus Torvalds if (!xprt->nocong) { 13341da177e4SLinus Torvalds int timer = task->tk_msg.rpc_proc->p_timer; 13351da177e4SLinus Torvalds task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); 13361da177e4SLinus Torvalds task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; 13371da177e4SLinus Torvalds if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) 13381da177e4SLinus Torvalds task->tk_timeout = xprt->timeout.to_maxval; 13391da177e4SLinus Torvalds } else 13401da177e4SLinus Torvalds task->tk_timeout = req->rq_timeout; 13411da177e4SLinus Torvalds /* Don't race with disconnect */ 13421da177e4SLinus Torvalds if (!xprt_connected(xprt)) 13431da177e4SLinus Torvalds task->tk_status = -ENOTCONN; 13441da177e4SLinus Torvalds else if (!req->rq_received) 13451da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); 13461da177e4SLinus Torvalds __xprt_release_write(xprt, task); 13471da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds 13501da177e4SLinus Torvalds /* 13511da177e4SLinus Torvalds * Reserve an RPC call slot. 13521da177e4SLinus Torvalds */ 13531da177e4SLinus Torvalds static inline void 13541da177e4SLinus Torvalds do_xprt_reserve(struct rpc_task *task) 13551da177e4SLinus Torvalds { 13561da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 13571da177e4SLinus Torvalds 13581da177e4SLinus Torvalds task->tk_status = 0; 13591da177e4SLinus Torvalds if (task->tk_rqstp) 13601da177e4SLinus Torvalds return; 13611da177e4SLinus Torvalds if (!list_empty(&xprt->free)) { 13621da177e4SLinus Torvalds struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 13631da177e4SLinus Torvalds list_del_init(&req->rq_list); 13641da177e4SLinus Torvalds task->tk_rqstp = req; 13651da177e4SLinus Torvalds xprt_request_init(task, xprt); 13661da177e4SLinus Torvalds return; 13671da177e4SLinus Torvalds } 13681da177e4SLinus Torvalds dprintk("RPC: waiting for request slot\n"); 13691da177e4SLinus Torvalds task->tk_status = -EAGAIN; 13701da177e4SLinus Torvalds task->tk_timeout = 0; 13711da177e4SLinus Torvalds rpc_sleep_on(&xprt->backlog, task, NULL, NULL); 13721da177e4SLinus Torvalds } 13731da177e4SLinus Torvalds 13741da177e4SLinus Torvalds void 13751da177e4SLinus Torvalds xprt_reserve(struct rpc_task *task) 13761da177e4SLinus Torvalds { 13771da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 13781da177e4SLinus Torvalds 13791da177e4SLinus Torvalds task->tk_status = -EIO; 13801da177e4SLinus Torvalds if (!xprt->shutdown) { 13811da177e4SLinus Torvalds spin_lock(&xprt->xprt_lock); 13821da177e4SLinus Torvalds do_xprt_reserve(task); 13831da177e4SLinus Torvalds spin_unlock(&xprt->xprt_lock); 13841da177e4SLinus Torvalds } 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds /* 13881da177e4SLinus Torvalds * Allocate a 'unique' XID 13891da177e4SLinus Torvalds */ 13901da177e4SLinus Torvalds static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) 13911da177e4SLinus Torvalds { 13921da177e4SLinus Torvalds return xprt->xid++; 13931da177e4SLinus Torvalds } 13941da177e4SLinus Torvalds 13951da177e4SLinus Torvalds static inline void xprt_init_xid(struct rpc_xprt *xprt) 13961da177e4SLinus Torvalds { 13971da177e4SLinus Torvalds get_random_bytes(&xprt->xid, sizeof(xprt->xid)); 13981da177e4SLinus Torvalds } 13991da177e4SLinus Torvalds 14001da177e4SLinus Torvalds /* 14011da177e4SLinus Torvalds * Initialize RPC request 14021da177e4SLinus Torvalds */ 14031da177e4SLinus Torvalds static void 14041da177e4SLinus Torvalds xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) 14051da177e4SLinus Torvalds { 14061da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 14071da177e4SLinus Torvalds 14081da177e4SLinus Torvalds req->rq_timeout = xprt->timeout.to_initval; 14091da177e4SLinus Torvalds req->rq_task = task; 14101da177e4SLinus Torvalds req->rq_xprt = xprt; 14111da177e4SLinus Torvalds req->rq_xid = xprt_alloc_xid(xprt); 14121da177e4SLinus Torvalds dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 14131da177e4SLinus Torvalds req, ntohl(req->rq_xid)); 14141da177e4SLinus Torvalds } 14151da177e4SLinus Torvalds 14161da177e4SLinus Torvalds /* 14171da177e4SLinus Torvalds * Release an RPC call slot 14181da177e4SLinus Torvalds */ 14191da177e4SLinus Torvalds void 14201da177e4SLinus Torvalds xprt_release(struct rpc_task *task) 14211da177e4SLinus Torvalds { 14221da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 14231da177e4SLinus Torvalds struct rpc_rqst *req; 14241da177e4SLinus Torvalds 14251da177e4SLinus Torvalds if (!(req = task->tk_rqstp)) 14261da177e4SLinus Torvalds return; 14271da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 14281da177e4SLinus Torvalds __xprt_release_write(xprt, task); 14291da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 14301da177e4SLinus Torvalds if (!list_empty(&req->rq_list)) 14311da177e4SLinus Torvalds list_del(&req->rq_list); 14321da177e4SLinus Torvalds xprt->last_used = jiffies; 14331da177e4SLinus Torvalds if (list_empty(&xprt->recv) && !xprt->shutdown) 14341da177e4SLinus Torvalds mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); 14351da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 14361da177e4SLinus Torvalds task->tk_rqstp = NULL; 14371da177e4SLinus Torvalds memset(req, 0, sizeof(*req)); /* mark unused */ 14381da177e4SLinus Torvalds 14391da177e4SLinus Torvalds dprintk("RPC: %4d release request %p\n", task->tk_pid, req); 14401da177e4SLinus Torvalds 14411da177e4SLinus Torvalds spin_lock(&xprt->xprt_lock); 14421da177e4SLinus Torvalds list_add(&req->rq_list, &xprt->free); 14431da177e4SLinus Torvalds xprt_clear_backlog(xprt); 14441da177e4SLinus Torvalds spin_unlock(&xprt->xprt_lock); 14451da177e4SLinus Torvalds } 14461da177e4SLinus Torvalds 14471da177e4SLinus Torvalds /* 14481da177e4SLinus Torvalds * Set default timeout parameters 14491da177e4SLinus Torvalds */ 14501da177e4SLinus Torvalds static void 14511da177e4SLinus Torvalds xprt_default_timeout(struct rpc_timeout *to, int proto) 14521da177e4SLinus Torvalds { 14531da177e4SLinus Torvalds if (proto == IPPROTO_UDP) 14541da177e4SLinus Torvalds xprt_set_timeout(to, 5, 5 * HZ); 14551da177e4SLinus Torvalds else 1456eab5c084SChuck Lever xprt_set_timeout(to, 2, 60 * HZ); 14571da177e4SLinus Torvalds } 14581da177e4SLinus Torvalds 14591da177e4SLinus Torvalds /* 14601da177e4SLinus Torvalds * Set constant timeout 14611da177e4SLinus Torvalds */ 14621da177e4SLinus Torvalds void 14631da177e4SLinus Torvalds xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) 14641da177e4SLinus Torvalds { 14651da177e4SLinus Torvalds to->to_initval = 14661da177e4SLinus Torvalds to->to_increment = incr; 1467eab5c084SChuck Lever to->to_maxval = to->to_initval + (incr * retr); 14681da177e4SLinus Torvalds to->to_retries = retr; 14691da177e4SLinus Torvalds to->to_exponential = 0; 14701da177e4SLinus Torvalds } 14711da177e4SLinus Torvalds 14721da177e4SLinus Torvalds unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 14731da177e4SLinus Torvalds unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; 14741da177e4SLinus Torvalds 14751da177e4SLinus Torvalds /* 14761da177e4SLinus Torvalds * Initialize an RPC client 14771da177e4SLinus Torvalds */ 14781da177e4SLinus Torvalds static struct rpc_xprt * 14791da177e4SLinus Torvalds xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) 14801da177e4SLinus Torvalds { 14811da177e4SLinus Torvalds struct rpc_xprt *xprt; 14821da177e4SLinus Torvalds unsigned int entries; 14831da177e4SLinus Torvalds size_t slot_table_size; 14841da177e4SLinus Torvalds struct rpc_rqst *req; 14851da177e4SLinus Torvalds 14861da177e4SLinus Torvalds dprintk("RPC: setting up %s transport...\n", 14871da177e4SLinus Torvalds proto == IPPROTO_UDP? "UDP" : "TCP"); 14881da177e4SLinus Torvalds 14891da177e4SLinus Torvalds entries = (proto == IPPROTO_TCP)? 14901da177e4SLinus Torvalds xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; 14911da177e4SLinus Torvalds 14921da177e4SLinus Torvalds if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) 14931da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 14941da177e4SLinus Torvalds memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ 14951da177e4SLinus Torvalds xprt->max_reqs = entries; 14961da177e4SLinus Torvalds slot_table_size = entries * sizeof(xprt->slot[0]); 14971da177e4SLinus Torvalds xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); 14981da177e4SLinus Torvalds if (xprt->slot == NULL) { 14991da177e4SLinus Torvalds kfree(xprt); 15001da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 15011da177e4SLinus Torvalds } 15021da177e4SLinus Torvalds memset(xprt->slot, 0, slot_table_size); 15031da177e4SLinus Torvalds 15041da177e4SLinus Torvalds xprt->addr = *ap; 15051da177e4SLinus Torvalds xprt->prot = proto; 15061da177e4SLinus Torvalds xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; 15071da177e4SLinus Torvalds if (xprt->stream) { 15081da177e4SLinus Torvalds xprt->cwnd = RPC_MAXCWND(xprt); 15091da177e4SLinus Torvalds xprt->nocong = 1; 15101da177e4SLinus Torvalds xprt->max_payload = (1U << 31) - 1; 15111da177e4SLinus Torvalds } else { 15121da177e4SLinus Torvalds xprt->cwnd = RPC_INITCWND; 15131da177e4SLinus Torvalds xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 15141da177e4SLinus Torvalds } 15151da177e4SLinus Torvalds spin_lock_init(&xprt->sock_lock); 15161da177e4SLinus Torvalds spin_lock_init(&xprt->xprt_lock); 15171da177e4SLinus Torvalds init_waitqueue_head(&xprt->cong_wait); 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds INIT_LIST_HEAD(&xprt->free); 15201da177e4SLinus Torvalds INIT_LIST_HEAD(&xprt->recv); 15211da177e4SLinus Torvalds INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); 15221da177e4SLinus Torvalds INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); 15231da177e4SLinus Torvalds init_timer(&xprt->timer); 15241da177e4SLinus Torvalds xprt->timer.function = xprt_init_autodisconnect; 15251da177e4SLinus Torvalds xprt->timer.data = (unsigned long) xprt; 15261da177e4SLinus Torvalds xprt->last_used = jiffies; 15271da177e4SLinus Torvalds xprt->port = XPRT_MAX_RESVPORT; 15281da177e4SLinus Torvalds 15291da177e4SLinus Torvalds /* Set timeout parameters */ 15301da177e4SLinus Torvalds if (to) { 15311da177e4SLinus Torvalds xprt->timeout = *to; 15321da177e4SLinus Torvalds } else 15331da177e4SLinus Torvalds xprt_default_timeout(&xprt->timeout, xprt->prot); 15341da177e4SLinus Torvalds 15351da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 15361da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 15371da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->resend, "xprt_resend"); 15381da177e4SLinus Torvalds rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 15391da177e4SLinus Torvalds 15401da177e4SLinus Torvalds /* initialize free list */ 15411da177e4SLinus Torvalds for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) 15421da177e4SLinus Torvalds list_add(&req->rq_list, &xprt->free); 15431da177e4SLinus Torvalds 15441da177e4SLinus Torvalds xprt_init_xid(xprt); 15451da177e4SLinus Torvalds 15461da177e4SLinus Torvalds /* Check whether we want to use a reserved port */ 15471da177e4SLinus Torvalds xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; 15481da177e4SLinus Torvalds 15491da177e4SLinus Torvalds dprintk("RPC: created transport %p with %u slots\n", xprt, 15501da177e4SLinus Torvalds xprt->max_reqs); 15511da177e4SLinus Torvalds 15521da177e4SLinus Torvalds return xprt; 15531da177e4SLinus Torvalds } 15541da177e4SLinus Torvalds 15551da177e4SLinus Torvalds /* 15561da177e4SLinus Torvalds * Bind to a reserved port 15571da177e4SLinus Torvalds */ 15581da177e4SLinus Torvalds static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 15591da177e4SLinus Torvalds { 15601da177e4SLinus Torvalds struct sockaddr_in myaddr = { 15611da177e4SLinus Torvalds .sin_family = AF_INET, 15621da177e4SLinus Torvalds }; 15631da177e4SLinus Torvalds int err, port; 15641da177e4SLinus Torvalds 15651da177e4SLinus Torvalds /* Were we already bound to a given port? Try to reuse it */ 15661da177e4SLinus Torvalds port = xprt->port; 15671da177e4SLinus Torvalds do { 15681da177e4SLinus Torvalds myaddr.sin_port = htons(port); 15691da177e4SLinus Torvalds err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, 15701da177e4SLinus Torvalds sizeof(myaddr)); 15711da177e4SLinus Torvalds if (err == 0) { 15721da177e4SLinus Torvalds xprt->port = port; 15731da177e4SLinus Torvalds return 0; 15741da177e4SLinus Torvalds } 15751da177e4SLinus Torvalds if (--port == 0) 15761da177e4SLinus Torvalds port = XPRT_MAX_RESVPORT; 15771da177e4SLinus Torvalds } while (err == -EADDRINUSE && port != xprt->port); 15781da177e4SLinus Torvalds 15791da177e4SLinus Torvalds printk("RPC: Can't bind to reserved port (%d).\n", -err); 15801da177e4SLinus Torvalds return err; 15811da177e4SLinus Torvalds } 15821da177e4SLinus Torvalds 15831da177e4SLinus Torvalds static void 15841da177e4SLinus Torvalds xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) 15851da177e4SLinus Torvalds { 15861da177e4SLinus Torvalds struct sock *sk = sock->sk; 15871da177e4SLinus Torvalds 15881da177e4SLinus Torvalds if (xprt->inet) 15891da177e4SLinus Torvalds return; 15901da177e4SLinus Torvalds 15911da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 15921da177e4SLinus Torvalds sk->sk_user_data = xprt; 15931da177e4SLinus Torvalds xprt->old_data_ready = sk->sk_data_ready; 15941da177e4SLinus Torvalds xprt->old_state_change = sk->sk_state_change; 15951da177e4SLinus Torvalds xprt->old_write_space = sk->sk_write_space; 15961da177e4SLinus Torvalds if (xprt->prot == IPPROTO_UDP) { 15971da177e4SLinus Torvalds sk->sk_data_ready = udp_data_ready; 15981da177e4SLinus Torvalds sk->sk_no_check = UDP_CSUM_NORCV; 15991da177e4SLinus Torvalds xprt_set_connected(xprt); 16001da177e4SLinus Torvalds } else { 16011da177e4SLinus Torvalds tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ 16021da177e4SLinus Torvalds sk->sk_data_ready = tcp_data_ready; 16031da177e4SLinus Torvalds sk->sk_state_change = tcp_state_change; 16041da177e4SLinus Torvalds xprt_clear_connected(xprt); 16051da177e4SLinus Torvalds } 16061da177e4SLinus Torvalds sk->sk_write_space = xprt_write_space; 16071da177e4SLinus Torvalds 16081da177e4SLinus Torvalds /* Reset to new socket */ 16091da177e4SLinus Torvalds xprt->sock = sock; 16101da177e4SLinus Torvalds xprt->inet = sk; 16111da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 16121da177e4SLinus Torvalds 16131da177e4SLinus Torvalds return; 16141da177e4SLinus Torvalds } 16151da177e4SLinus Torvalds 16161da177e4SLinus Torvalds /* 16171da177e4SLinus Torvalds * Set socket buffer length 16181da177e4SLinus Torvalds */ 16191da177e4SLinus Torvalds void 16201da177e4SLinus Torvalds xprt_sock_setbufsize(struct rpc_xprt *xprt) 16211da177e4SLinus Torvalds { 16221da177e4SLinus Torvalds struct sock *sk = xprt->inet; 16231da177e4SLinus Torvalds 16241da177e4SLinus Torvalds if (xprt->stream) 16251da177e4SLinus Torvalds return; 16261da177e4SLinus Torvalds if (xprt->rcvsize) { 16271da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 16281da177e4SLinus Torvalds sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; 16291da177e4SLinus Torvalds } 16301da177e4SLinus Torvalds if (xprt->sndsize) { 16311da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 16321da177e4SLinus Torvalds sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; 16331da177e4SLinus Torvalds sk->sk_write_space(sk); 16341da177e4SLinus Torvalds } 16351da177e4SLinus Torvalds } 16361da177e4SLinus Torvalds 16371da177e4SLinus Torvalds /* 16381da177e4SLinus Torvalds * Datastream sockets are created here, but xprt_connect will create 16391da177e4SLinus Torvalds * and connect stream sockets. 16401da177e4SLinus Torvalds */ 16411da177e4SLinus Torvalds static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) 16421da177e4SLinus Torvalds { 16431da177e4SLinus Torvalds struct socket *sock; 16441da177e4SLinus Torvalds int type, err; 16451da177e4SLinus Torvalds 16461da177e4SLinus Torvalds dprintk("RPC: xprt_create_socket(%s %d)\n", 16471da177e4SLinus Torvalds (proto == IPPROTO_UDP)? "udp" : "tcp", proto); 16481da177e4SLinus Torvalds 16491da177e4SLinus Torvalds type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 16501da177e4SLinus Torvalds 16511da177e4SLinus Torvalds if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { 16521da177e4SLinus Torvalds printk("RPC: can't create socket (%d).\n", -err); 16531da177e4SLinus Torvalds return NULL; 16541da177e4SLinus Torvalds } 16551da177e4SLinus Torvalds 16561da177e4SLinus Torvalds /* If the caller has the capability, bind to a reserved port */ 16571da177e4SLinus Torvalds if (resvport && xprt_bindresvport(xprt, sock) < 0) { 16581da177e4SLinus Torvalds printk("RPC: can't bind to reserved port.\n"); 16591da177e4SLinus Torvalds goto failed; 16601da177e4SLinus Torvalds } 16611da177e4SLinus Torvalds 16621da177e4SLinus Torvalds return sock; 16631da177e4SLinus Torvalds 16641da177e4SLinus Torvalds failed: 16651da177e4SLinus Torvalds sock_release(sock); 16661da177e4SLinus Torvalds return NULL; 16671da177e4SLinus Torvalds } 16681da177e4SLinus Torvalds 16691da177e4SLinus Torvalds /* 16701da177e4SLinus Torvalds * Create an RPC client transport given the protocol and peer address. 16711da177e4SLinus Torvalds */ 16721da177e4SLinus Torvalds struct rpc_xprt * 16731da177e4SLinus Torvalds xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) 16741da177e4SLinus Torvalds { 16751da177e4SLinus Torvalds struct rpc_xprt *xprt; 16761da177e4SLinus Torvalds 16771da177e4SLinus Torvalds xprt = xprt_setup(proto, sap, to); 16781da177e4SLinus Torvalds if (IS_ERR(xprt)) 16791da177e4SLinus Torvalds dprintk("RPC: xprt_create_proto failed\n"); 16801da177e4SLinus Torvalds else 16811da177e4SLinus Torvalds dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); 16821da177e4SLinus Torvalds return xprt; 16831da177e4SLinus Torvalds } 16841da177e4SLinus Torvalds 16851da177e4SLinus Torvalds /* 16861da177e4SLinus Torvalds * Prepare for transport shutdown. 16871da177e4SLinus Torvalds */ 16881da177e4SLinus Torvalds static void 16891da177e4SLinus Torvalds xprt_shutdown(struct rpc_xprt *xprt) 16901da177e4SLinus Torvalds { 16911da177e4SLinus Torvalds xprt->shutdown = 1; 16921da177e4SLinus Torvalds rpc_wake_up(&xprt->sending); 16931da177e4SLinus Torvalds rpc_wake_up(&xprt->resend); 16941da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 16951da177e4SLinus Torvalds rpc_wake_up(&xprt->backlog); 16961da177e4SLinus Torvalds wake_up(&xprt->cong_wait); 16971da177e4SLinus Torvalds del_timer_sync(&xprt->timer); 1698ae388462SChuck Lever 1699ae388462SChuck Lever /* synchronously wait for connect worker to finish */ 1700ae388462SChuck Lever cancel_delayed_work(&xprt->sock_connect); 1701ae388462SChuck Lever flush_scheduled_work(); 17021da177e4SLinus Torvalds } 17031da177e4SLinus Torvalds 17041da177e4SLinus Torvalds /* 17051da177e4SLinus Torvalds * Clear the xprt backlog queue 17061da177e4SLinus Torvalds */ 17071da177e4SLinus Torvalds static int 17081da177e4SLinus Torvalds xprt_clear_backlog(struct rpc_xprt *xprt) { 17091da177e4SLinus Torvalds rpc_wake_up_next(&xprt->backlog); 17101da177e4SLinus Torvalds wake_up(&xprt->cong_wait); 17111da177e4SLinus Torvalds return 1; 17121da177e4SLinus Torvalds } 17131da177e4SLinus Torvalds 17141da177e4SLinus Torvalds /* 17151da177e4SLinus Torvalds * Destroy an RPC transport, killing off all requests. 17161da177e4SLinus Torvalds */ 17171da177e4SLinus Torvalds int 17181da177e4SLinus Torvalds xprt_destroy(struct rpc_xprt *xprt) 17191da177e4SLinus Torvalds { 17201da177e4SLinus Torvalds dprintk("RPC: destroying transport %p\n", xprt); 17211da177e4SLinus Torvalds xprt_shutdown(xprt); 17221da177e4SLinus Torvalds xprt_disconnect(xprt); 17231da177e4SLinus Torvalds xprt_close(xprt); 17241da177e4SLinus Torvalds kfree(xprt->slot); 17251da177e4SLinus Torvalds kfree(xprt); 17261da177e4SLinus Torvalds 17271da177e4SLinus Torvalds return 0; 17281da177e4SLinus Torvalds } 1729