11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/net/sunrpc/xprt.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This is a generic RPC call interface supporting congestion avoidance, 51da177e4SLinus Torvalds * and asynchronous calls. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * The interface works like this: 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * - When a process places a call, it allocates a request slot if 101da177e4SLinus Torvalds * one is available. Otherwise, it sleeps on the backlog queue 111da177e4SLinus Torvalds * (xprt_reserve). 121da177e4SLinus Torvalds * - Next, the caller puts together the RPC message, stuffs it into 131da177e4SLinus Torvalds * the request struct, and calls xprt_call(). 141da177e4SLinus Torvalds * - xprt_call transmits the message and installs the caller on the 151da177e4SLinus Torvalds * socket's wait list. At the same time, it installs a timer that 161da177e4SLinus Torvalds * is run after the packet's timeout has expired. 171da177e4SLinus Torvalds * - When a packet arrives, the data_ready handler walks the list of 181da177e4SLinus Torvalds * pending requests for that socket. If a matching XID is found, the 191da177e4SLinus Torvalds * caller is woken up, and the timer removed. 201da177e4SLinus Torvalds * - When no reply arrives within the timeout interval, the timer is 211da177e4SLinus Torvalds * fired by the kernel and runs xprt_timer(). It either adjusts the 221da177e4SLinus Torvalds * timeout values (minor timeout) or wakes up the caller with a status 231da177e4SLinus Torvalds * of -ETIMEDOUT. 241da177e4SLinus Torvalds * - When the caller receives a notification from RPC that a reply arrived, 251da177e4SLinus Torvalds * it should release the RPC slot, and process the reply. 261da177e4SLinus Torvalds * If the call timed out, it may choose to retry the operation by 271da177e4SLinus Torvalds * adjusting the initial timeout value, and simply calling rpc_call 281da177e4SLinus Torvalds * again. 291da177e4SLinus Torvalds * 301da177e4SLinus Torvalds * Support for async RPC is done through a set of RPC-specific scheduling 311da177e4SLinus Torvalds * primitives that `transparently' work for processes as well as async 321da177e4SLinus Torvalds * tasks that rely on callbacks. 331da177e4SLinus Torvalds * 341da177e4SLinus Torvalds * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> 351da177e4SLinus Torvalds * 361da177e4SLinus Torvalds * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> 371da177e4SLinus Torvalds * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> 381da177e4SLinus Torvalds * TCP NFS related read + write fixes 391da177e4SLinus Torvalds * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> 401da177e4SLinus Torvalds * 411da177e4SLinus Torvalds * Rewrite of larges part of the code in order to stabilize TCP stuff. 421da177e4SLinus Torvalds * Fix behaviour when socket buffer is full. 431da177e4SLinus Torvalds * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> 441da177e4SLinus Torvalds */ 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds #include <linux/types.h> 471da177e4SLinus Torvalds #include <linux/slab.h> 481da177e4SLinus Torvalds #include <linux/capability.h> 491da177e4SLinus Torvalds #include <linux/sched.h> 501da177e4SLinus Torvalds #include <linux/errno.h> 511da177e4SLinus Torvalds #include <linux/socket.h> 521da177e4SLinus Torvalds #include <linux/in.h> 531da177e4SLinus Torvalds #include <linux/net.h> 541da177e4SLinus Torvalds #include <linux/mm.h> 551da177e4SLinus Torvalds #include <linux/udp.h> 561da177e4SLinus Torvalds #include <linux/tcp.h> 571da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h> 581da177e4SLinus Torvalds #include <linux/file.h> 591da177e4SLinus Torvalds #include <linux/workqueue.h> 601da177e4SLinus Torvalds #include <linux/random.h> 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds #include <net/sock.h> 631da177e4SLinus Torvalds #include <net/checksum.h> 641da177e4SLinus Torvalds #include <net/udp.h> 651da177e4SLinus Torvalds #include <net/tcp.h> 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds /* 681da177e4SLinus Torvalds * Local variables 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds #ifdef RPC_DEBUG 721da177e4SLinus Torvalds # undef RPC_DEBUG_DATA 731da177e4SLinus Torvalds # define RPCDBG_FACILITY RPCDBG_XPRT 741da177e4SLinus Torvalds #endif 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds #define XPRT_MAX_BACKOFF (8) 771da177e4SLinus Torvalds #define XPRT_IDLE_TIMEOUT (5*60*HZ) 781da177e4SLinus Torvalds #define XPRT_MAX_RESVPORT (800) 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds /* 811da177e4SLinus Torvalds * Local functions 821da177e4SLinus Torvalds */ 831da177e4SLinus Torvalds static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 841da177e4SLinus Torvalds static inline void do_xprt_reserve(struct rpc_task *); 851da177e4SLinus Torvalds static void xprt_disconnect(struct rpc_xprt *); 861da177e4SLinus Torvalds static void xprt_connect_status(struct rpc_task *task); 871da177e4SLinus Torvalds static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, 881da177e4SLinus Torvalds struct rpc_timeout *to); 891da177e4SLinus Torvalds static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); 901da177e4SLinus Torvalds static void xprt_bind_socket(struct rpc_xprt *, struct socket *); 911da177e4SLinus Torvalds static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds static int xprt_clear_backlog(struct rpc_xprt *xprt); 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds #ifdef RPC_DEBUG_DATA 961da177e4SLinus Torvalds /* 971da177e4SLinus Torvalds * Print the buffer contents (first 128 bytes only--just enough for 981da177e4SLinus Torvalds * diropres return). 991da177e4SLinus Torvalds */ 1001da177e4SLinus Torvalds static void 1011da177e4SLinus Torvalds xprt_pktdump(char *msg, u32 *packet, unsigned int count) 1021da177e4SLinus Torvalds { 1031da177e4SLinus Torvalds u8 *buf = (u8 *) packet; 1041da177e4SLinus Torvalds int j; 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds dprintk("RPC: %s\n", msg); 1071da177e4SLinus Torvalds for (j = 0; j < count && j < 128; j += 4) { 1081da177e4SLinus Torvalds if (!(j & 31)) { 1091da177e4SLinus Torvalds if (j) 1101da177e4SLinus Torvalds dprintk("\n"); 1111da177e4SLinus Torvalds dprintk("0x%04x ", j); 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds dprintk("%02x%02x%02x%02x ", 1141da177e4SLinus Torvalds buf[j], buf[j+1], buf[j+2], buf[j+3]); 1151da177e4SLinus Torvalds } 1161da177e4SLinus Torvalds dprintk("\n"); 1171da177e4SLinus Torvalds } 1181da177e4SLinus Torvalds #else 1191da177e4SLinus Torvalds static inline void 1201da177e4SLinus Torvalds xprt_pktdump(char *msg, u32 *packet, unsigned int count) 1211da177e4SLinus Torvalds { 1221da177e4SLinus Torvalds /* NOP */ 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds #endif 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* 1271da177e4SLinus Torvalds * Look up RPC transport given an INET socket 1281da177e4SLinus Torvalds */ 1291da177e4SLinus Torvalds static inline struct rpc_xprt * 1301da177e4SLinus Torvalds xprt_from_sock(struct sock *sk) 1311da177e4SLinus Torvalds { 1321da177e4SLinus Torvalds return (struct rpc_xprt *) sk->sk_user_data; 1331da177e4SLinus Torvalds } 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds /* 1361da177e4SLinus Torvalds * Serialize write access to sockets, in order to prevent different 1371da177e4SLinus Torvalds * requests from interfering with each other. 1381da177e4SLinus Torvalds * Also prevents TCP socket connects from colliding with writes. 1391da177e4SLinus Torvalds */ 1401da177e4SLinus Torvalds static int 1411da177e4SLinus Torvalds __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 1421da177e4SLinus Torvalds { 1431da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { 1461da177e4SLinus Torvalds if (task == xprt->snd_task) 1471da177e4SLinus Torvalds return 1; 1481da177e4SLinus Torvalds if (task == NULL) 1491da177e4SLinus Torvalds return 0; 1501da177e4SLinus Torvalds goto out_sleep; 1511da177e4SLinus Torvalds } 1521da177e4SLinus Torvalds if (xprt->nocong || __xprt_get_cong(xprt, task)) { 1531da177e4SLinus Torvalds xprt->snd_task = task; 1541da177e4SLinus Torvalds if (req) { 1551da177e4SLinus Torvalds req->rq_bytes_sent = 0; 1561da177e4SLinus Torvalds req->rq_ntrans++; 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds return 1; 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds smp_mb__before_clear_bit(); 1611da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 1621da177e4SLinus Torvalds smp_mb__after_clear_bit(); 1631da177e4SLinus Torvalds out_sleep: 1641da177e4SLinus Torvalds dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); 1651da177e4SLinus Torvalds task->tk_timeout = 0; 1661da177e4SLinus Torvalds task->tk_status = -EAGAIN; 1671da177e4SLinus Torvalds if (req && req->rq_ntrans) 1681da177e4SLinus Torvalds rpc_sleep_on(&xprt->resend, task, NULL, NULL); 1691da177e4SLinus Torvalds else 1701da177e4SLinus Torvalds rpc_sleep_on(&xprt->sending, task, NULL, NULL); 1711da177e4SLinus Torvalds return 0; 1721da177e4SLinus Torvalds } 1731da177e4SLinus Torvalds 1741da177e4SLinus Torvalds static inline int 1751da177e4SLinus Torvalds xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 1761da177e4SLinus Torvalds { 1771da177e4SLinus Torvalds int retval; 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 1801da177e4SLinus Torvalds retval = __xprt_lock_write(xprt, task); 1811da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 1821da177e4SLinus Torvalds return retval; 1831da177e4SLinus Torvalds } 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds static void 1871da177e4SLinus Torvalds __xprt_lock_write_next(struct rpc_xprt *xprt) 1881da177e4SLinus Torvalds { 1891da177e4SLinus Torvalds struct rpc_task *task; 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 1921da177e4SLinus Torvalds return; 1931da177e4SLinus Torvalds if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) 1941da177e4SLinus Torvalds goto out_unlock; 1951da177e4SLinus Torvalds task = rpc_wake_up_next(&xprt->resend); 1961da177e4SLinus Torvalds if (!task) { 1971da177e4SLinus Torvalds task = rpc_wake_up_next(&xprt->sending); 1981da177e4SLinus Torvalds if (!task) 1991da177e4SLinus Torvalds goto out_unlock; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds if (xprt->nocong || __xprt_get_cong(xprt, task)) { 2021da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 2031da177e4SLinus Torvalds xprt->snd_task = task; 2041da177e4SLinus Torvalds if (req) { 2051da177e4SLinus Torvalds req->rq_bytes_sent = 0; 2061da177e4SLinus Torvalds req->rq_ntrans++; 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds return; 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds out_unlock: 2111da177e4SLinus Torvalds smp_mb__before_clear_bit(); 2121da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 2131da177e4SLinus Torvalds smp_mb__after_clear_bit(); 2141da177e4SLinus Torvalds } 2151da177e4SLinus Torvalds 2161da177e4SLinus Torvalds /* 2171da177e4SLinus Torvalds * Releases the socket for use by other requests. 2181da177e4SLinus Torvalds */ 2191da177e4SLinus Torvalds static void 2201da177e4SLinus Torvalds __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 2211da177e4SLinus Torvalds { 2221da177e4SLinus Torvalds if (xprt->snd_task == task) { 2231da177e4SLinus Torvalds xprt->snd_task = NULL; 2241da177e4SLinus Torvalds smp_mb__before_clear_bit(); 2251da177e4SLinus Torvalds clear_bit(XPRT_LOCKED, &xprt->sockstate); 2261da177e4SLinus Torvalds smp_mb__after_clear_bit(); 2271da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 2281da177e4SLinus Torvalds } 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds static inline void 2321da177e4SLinus Torvalds xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 2331da177e4SLinus Torvalds { 2341da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 2351da177e4SLinus Torvalds __xprt_release_write(xprt, task); 2361da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds /* 2401da177e4SLinus Torvalds * Write data to socket. 2411da177e4SLinus Torvalds */ 2421da177e4SLinus Torvalds static inline int 2431da177e4SLinus Torvalds xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) 2441da177e4SLinus Torvalds { 2451da177e4SLinus Torvalds struct socket *sock = xprt->sock; 2461da177e4SLinus Torvalds struct xdr_buf *xdr = &req->rq_snd_buf; 2471da177e4SLinus Torvalds struct sockaddr *addr = NULL; 2481da177e4SLinus Torvalds int addrlen = 0; 2491da177e4SLinus Torvalds unsigned int skip; 2501da177e4SLinus Torvalds int result; 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds if (!sock) 2531da177e4SLinus Torvalds return -ENOTCONN; 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds xprt_pktdump("packet data:", 2561da177e4SLinus Torvalds req->rq_svec->iov_base, 2571da177e4SLinus Torvalds req->rq_svec->iov_len); 2581da177e4SLinus Torvalds 2591da177e4SLinus Torvalds /* For UDP, we need to provide an address */ 2601da177e4SLinus Torvalds if (!xprt->stream) { 2611da177e4SLinus Torvalds addr = (struct sockaddr *) &xprt->addr; 2621da177e4SLinus Torvalds addrlen = sizeof(xprt->addr); 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds /* Dont repeat bytes */ 2651da177e4SLinus Torvalds skip = req->rq_bytes_sent; 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 2681da177e4SLinus Torvalds result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds if (result >= 0) 2731da177e4SLinus Torvalds return result; 2741da177e4SLinus Torvalds 2751da177e4SLinus Torvalds switch (result) { 2761da177e4SLinus Torvalds case -ECONNREFUSED: 2771da177e4SLinus Torvalds /* When the server has died, an ICMP port unreachable message 2781da177e4SLinus Torvalds * prompts ECONNREFUSED. 2791da177e4SLinus Torvalds */ 2801da177e4SLinus Torvalds case -EAGAIN: 2811da177e4SLinus Torvalds break; 2821da177e4SLinus Torvalds case -ECONNRESET: 2831da177e4SLinus Torvalds case -ENOTCONN: 2841da177e4SLinus Torvalds case -EPIPE: 2851da177e4SLinus Torvalds /* connection broken */ 2861da177e4SLinus Torvalds if (xprt->stream) 2871da177e4SLinus Torvalds result = -ENOTCONN; 2881da177e4SLinus Torvalds break; 2891da177e4SLinus Torvalds default: 2901da177e4SLinus Torvalds printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); 2911da177e4SLinus Torvalds } 2921da177e4SLinus Torvalds return result; 2931da177e4SLinus Torvalds } 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds /* 2961da177e4SLinus Torvalds * Van Jacobson congestion avoidance. Check if the congestion window 2971da177e4SLinus Torvalds * overflowed. Put the task to sleep if this is the case. 2981da177e4SLinus Torvalds */ 2991da177e4SLinus Torvalds static int 3001da177e4SLinus Torvalds __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) 3011da177e4SLinus Torvalds { 3021da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds if (req->rq_cong) 3051da177e4SLinus Torvalds return 1; 3061da177e4SLinus Torvalds dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", 3071da177e4SLinus Torvalds task->tk_pid, xprt->cong, xprt->cwnd); 3081da177e4SLinus Torvalds if (RPCXPRT_CONGESTED(xprt)) 3091da177e4SLinus Torvalds return 0; 3101da177e4SLinus Torvalds req->rq_cong = 1; 3111da177e4SLinus Torvalds xprt->cong += RPC_CWNDSCALE; 3121da177e4SLinus Torvalds return 1; 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds /* 3161da177e4SLinus Torvalds * Adjust the congestion window, and wake up the next task 3171da177e4SLinus Torvalds * that has been sleeping due to congestion 3181da177e4SLinus Torvalds */ 3191da177e4SLinus Torvalds static void 3201da177e4SLinus Torvalds __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 3211da177e4SLinus Torvalds { 3221da177e4SLinus Torvalds if (!req->rq_cong) 3231da177e4SLinus Torvalds return; 3241da177e4SLinus Torvalds req->rq_cong = 0; 3251da177e4SLinus Torvalds xprt->cong -= RPC_CWNDSCALE; 3261da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds /* 3301da177e4SLinus Torvalds * Adjust RPC congestion window 3311da177e4SLinus Torvalds * We use a time-smoothed congestion estimator to avoid heavy oscillation. 3321da177e4SLinus Torvalds */ 3331da177e4SLinus Torvalds static void 3341da177e4SLinus Torvalds xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) 3351da177e4SLinus Torvalds { 3361da177e4SLinus Torvalds unsigned long cwnd; 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds cwnd = xprt->cwnd; 3391da177e4SLinus Torvalds if (result >= 0 && cwnd <= xprt->cong) { 3401da177e4SLinus Torvalds /* The (cwnd >> 1) term makes sure 3411da177e4SLinus Torvalds * the result gets rounded properly. */ 3421da177e4SLinus Torvalds cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; 3431da177e4SLinus Torvalds if (cwnd > RPC_MAXCWND(xprt)) 3441da177e4SLinus Torvalds cwnd = RPC_MAXCWND(xprt); 3451da177e4SLinus Torvalds __xprt_lock_write_next(xprt); 3461da177e4SLinus Torvalds } else if (result == -ETIMEDOUT) { 3471da177e4SLinus Torvalds cwnd >>= 1; 3481da177e4SLinus Torvalds if (cwnd < RPC_CWNDSCALE) 3491da177e4SLinus Torvalds cwnd = RPC_CWNDSCALE; 3501da177e4SLinus Torvalds } 3511da177e4SLinus Torvalds dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", 3521da177e4SLinus Torvalds xprt->cong, xprt->cwnd, cwnd); 3531da177e4SLinus Torvalds xprt->cwnd = cwnd; 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds /* 3571da177e4SLinus Torvalds * Reset the major timeout value 3581da177e4SLinus Torvalds */ 3591da177e4SLinus Torvalds static void xprt_reset_majortimeo(struct rpc_rqst *req) 3601da177e4SLinus Torvalds { 3611da177e4SLinus Torvalds struct rpc_timeout *to = &req->rq_xprt->timeout; 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds req->rq_majortimeo = req->rq_timeout; 3641da177e4SLinus Torvalds if (to->to_exponential) 3651da177e4SLinus Torvalds req->rq_majortimeo <<= to->to_retries; 3661da177e4SLinus Torvalds else 3671da177e4SLinus Torvalds req->rq_majortimeo += to->to_increment * to->to_retries; 3681da177e4SLinus Torvalds if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) 3691da177e4SLinus Torvalds req->rq_majortimeo = to->to_maxval; 3701da177e4SLinus Torvalds req->rq_majortimeo += jiffies; 3711da177e4SLinus Torvalds } 3721da177e4SLinus Torvalds 3731da177e4SLinus Torvalds /* 3741da177e4SLinus Torvalds * Adjust timeout values etc for next retransmit 3751da177e4SLinus Torvalds */ 3761da177e4SLinus Torvalds int xprt_adjust_timeout(struct rpc_rqst *req) 3771da177e4SLinus Torvalds { 3781da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 3791da177e4SLinus Torvalds struct rpc_timeout *to = &xprt->timeout; 3801da177e4SLinus Torvalds int status = 0; 3811da177e4SLinus Torvalds 3821da177e4SLinus Torvalds if (time_before(jiffies, req->rq_majortimeo)) { 3831da177e4SLinus Torvalds if (to->to_exponential) 3841da177e4SLinus Torvalds req->rq_timeout <<= 1; 3851da177e4SLinus Torvalds else 3861da177e4SLinus Torvalds req->rq_timeout += to->to_increment; 3871da177e4SLinus Torvalds if (to->to_maxval && req->rq_timeout >= to->to_maxval) 3881da177e4SLinus Torvalds req->rq_timeout = to->to_maxval; 3891da177e4SLinus Torvalds req->rq_retries++; 3901da177e4SLinus Torvalds pprintk("RPC: %lu retrans\n", jiffies); 3911da177e4SLinus Torvalds } else { 3921da177e4SLinus Torvalds req->rq_timeout = to->to_initval; 3931da177e4SLinus Torvalds req->rq_retries = 0; 3941da177e4SLinus Torvalds xprt_reset_majortimeo(req); 3951da177e4SLinus Torvalds /* Reset the RTT counters == "slow start" */ 3961da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 3971da177e4SLinus Torvalds rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 3981da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 3991da177e4SLinus Torvalds pprintk("RPC: %lu timeout\n", jiffies); 4001da177e4SLinus Torvalds status = -ETIMEDOUT; 4011da177e4SLinus Torvalds } 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds if (req->rq_timeout == 0) { 4041da177e4SLinus Torvalds printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); 4051da177e4SLinus Torvalds req->rq_timeout = 5 * HZ; 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds return status; 4081da177e4SLinus Torvalds } 4091da177e4SLinus Torvalds 4101da177e4SLinus Torvalds /* 4111da177e4SLinus Torvalds * Close down a transport socket 4121da177e4SLinus Torvalds */ 4131da177e4SLinus Torvalds static void 4141da177e4SLinus Torvalds xprt_close(struct rpc_xprt *xprt) 4151da177e4SLinus Torvalds { 4161da177e4SLinus Torvalds struct socket *sock = xprt->sock; 4171da177e4SLinus Torvalds struct sock *sk = xprt->inet; 4181da177e4SLinus Torvalds 4191da177e4SLinus Torvalds if (!sk) 4201da177e4SLinus Torvalds return; 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 4231da177e4SLinus Torvalds xprt->inet = NULL; 4241da177e4SLinus Torvalds xprt->sock = NULL; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds sk->sk_user_data = NULL; 4271da177e4SLinus Torvalds sk->sk_data_ready = xprt->old_data_ready; 4281da177e4SLinus Torvalds sk->sk_state_change = xprt->old_state_change; 4291da177e4SLinus Torvalds sk->sk_write_space = xprt->old_write_space; 4301da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds sk->sk_no_check = 0; 4331da177e4SLinus Torvalds 4341da177e4SLinus Torvalds sock_release(sock); 4351da177e4SLinus Torvalds } 4361da177e4SLinus Torvalds 4371da177e4SLinus Torvalds static void 4381da177e4SLinus Torvalds xprt_socket_autoclose(void *args) 4391da177e4SLinus Torvalds { 4401da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)args; 4411da177e4SLinus Torvalds 4421da177e4SLinus Torvalds xprt_disconnect(xprt); 4431da177e4SLinus Torvalds xprt_close(xprt); 4441da177e4SLinus Torvalds xprt_release_write(xprt, NULL); 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4471da177e4SLinus Torvalds /* 4481da177e4SLinus Torvalds * Mark a transport as disconnected 4491da177e4SLinus Torvalds */ 4501da177e4SLinus Torvalds static void 4511da177e4SLinus Torvalds xprt_disconnect(struct rpc_xprt *xprt) 4521da177e4SLinus Torvalds { 4531da177e4SLinus Torvalds dprintk("RPC: disconnected transport %p\n", xprt); 4541da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 4551da177e4SLinus Torvalds xprt_clear_connected(xprt); 4561da177e4SLinus Torvalds rpc_wake_up_status(&xprt->pending, -ENOTCONN); 4571da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 4581da177e4SLinus Torvalds } 4591da177e4SLinus Torvalds 4601da177e4SLinus Torvalds /* 4611da177e4SLinus Torvalds * Used to allow disconnection when we've been idle 4621da177e4SLinus Torvalds */ 4631da177e4SLinus Torvalds static void 4641da177e4SLinus Torvalds xprt_init_autodisconnect(unsigned long data) 4651da177e4SLinus Torvalds { 4661da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)data; 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 4691da177e4SLinus Torvalds if (!list_empty(&xprt->recv) || xprt->shutdown) 4701da177e4SLinus Torvalds goto out_abort; 4711da177e4SLinus Torvalds if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) 4721da177e4SLinus Torvalds goto out_abort; 4731da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 4741da177e4SLinus Torvalds /* Let keventd close the socket */ 4751da177e4SLinus Torvalds if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) 4761da177e4SLinus Torvalds xprt_release_write(xprt, NULL); 4771da177e4SLinus Torvalds else 4781da177e4SLinus Torvalds schedule_work(&xprt->task_cleanup); 4791da177e4SLinus Torvalds return; 4801da177e4SLinus Torvalds out_abort: 4811da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 4821da177e4SLinus Torvalds } 4831da177e4SLinus Torvalds 4841da177e4SLinus Torvalds static void xprt_socket_connect(void *args) 4851da177e4SLinus Torvalds { 4861da177e4SLinus Torvalds struct rpc_xprt *xprt = (struct rpc_xprt *)args; 4871da177e4SLinus Torvalds struct socket *sock = xprt->sock; 4881da177e4SLinus Torvalds int status = -EIO; 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds if (xprt->shutdown || xprt->addr.sin_port == 0) 4911da177e4SLinus Torvalds goto out; 4921da177e4SLinus Torvalds 4931da177e4SLinus Torvalds /* 4941da177e4SLinus Torvalds * Start by resetting any existing state 4951da177e4SLinus Torvalds */ 4961da177e4SLinus Torvalds xprt_close(xprt); 4971da177e4SLinus Torvalds sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); 4981da177e4SLinus Torvalds if (sock == NULL) { 4991da177e4SLinus Torvalds /* couldn't create socket or bind to reserved port; 5001da177e4SLinus Torvalds * this is likely a permanent error, so cause an abort */ 5011da177e4SLinus Torvalds goto out; 5021da177e4SLinus Torvalds } 5031da177e4SLinus Torvalds xprt_bind_socket(xprt, sock); 5041da177e4SLinus Torvalds xprt_sock_setbufsize(xprt); 5051da177e4SLinus Torvalds 5061da177e4SLinus Torvalds status = 0; 5071da177e4SLinus Torvalds if (!xprt->stream) 5081da177e4SLinus Torvalds goto out; 5091da177e4SLinus Torvalds 5101da177e4SLinus Torvalds /* 5111da177e4SLinus Torvalds * Tell the socket layer to start connecting... 5121da177e4SLinus Torvalds */ 5131da177e4SLinus Torvalds status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, 5141da177e4SLinus Torvalds sizeof(xprt->addr), O_NONBLOCK); 5151da177e4SLinus Torvalds dprintk("RPC: %p connect status %d connected %d sock state %d\n", 5161da177e4SLinus Torvalds xprt, -status, xprt_connected(xprt), sock->sk->sk_state); 5171da177e4SLinus Torvalds if (status < 0) { 5181da177e4SLinus Torvalds switch (status) { 5191da177e4SLinus Torvalds case -EINPROGRESS: 5201da177e4SLinus Torvalds case -EALREADY: 5211da177e4SLinus Torvalds goto out_clear; 5221da177e4SLinus Torvalds } 5231da177e4SLinus Torvalds } 5241da177e4SLinus Torvalds out: 5251da177e4SLinus Torvalds if (status < 0) 5261da177e4SLinus Torvalds rpc_wake_up_status(&xprt->pending, status); 5271da177e4SLinus Torvalds else 5281da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 5291da177e4SLinus Torvalds out_clear: 5301da177e4SLinus Torvalds smp_mb__before_clear_bit(); 5311da177e4SLinus Torvalds clear_bit(XPRT_CONNECTING, &xprt->sockstate); 5321da177e4SLinus Torvalds smp_mb__after_clear_bit(); 5331da177e4SLinus Torvalds } 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds /* 5361da177e4SLinus Torvalds * Attempt to connect a TCP socket. 5371da177e4SLinus Torvalds * 5381da177e4SLinus Torvalds */ 5391da177e4SLinus Torvalds void xprt_connect(struct rpc_task *task) 5401da177e4SLinus Torvalds { 5411da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 5421da177e4SLinus Torvalds 5431da177e4SLinus Torvalds dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, 5441da177e4SLinus Torvalds xprt, (xprt_connected(xprt) ? "is" : "is not")); 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds if (xprt->shutdown) { 5471da177e4SLinus Torvalds task->tk_status = -EIO; 5481da177e4SLinus Torvalds return; 5491da177e4SLinus Torvalds } 5501da177e4SLinus Torvalds if (!xprt->addr.sin_port) { 5511da177e4SLinus Torvalds task->tk_status = -EIO; 5521da177e4SLinus Torvalds return; 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds if (!xprt_lock_write(xprt, task)) 5551da177e4SLinus Torvalds return; 5561da177e4SLinus Torvalds if (xprt_connected(xprt)) 5571da177e4SLinus Torvalds goto out_write; 5581da177e4SLinus Torvalds 5591da177e4SLinus Torvalds if (task->tk_rqstp) 5601da177e4SLinus Torvalds task->tk_rqstp->rq_bytes_sent = 0; 5611da177e4SLinus Torvalds 5621da177e4SLinus Torvalds task->tk_timeout = RPC_CONNECT_TIMEOUT; 5631da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); 5641da177e4SLinus Torvalds if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { 5651da177e4SLinus Torvalds /* Note: if we are here due to a dropped connection 5661da177e4SLinus Torvalds * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ 5671da177e4SLinus Torvalds * seconds 5681da177e4SLinus Torvalds */ 5691da177e4SLinus Torvalds if (xprt->sock != NULL) 5701da177e4SLinus Torvalds schedule_delayed_work(&xprt->sock_connect, 5711da177e4SLinus Torvalds RPC_REESTABLISH_TIMEOUT); 5721da177e4SLinus Torvalds else 5731da177e4SLinus Torvalds schedule_work(&xprt->sock_connect); 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds return; 5761da177e4SLinus Torvalds out_write: 5771da177e4SLinus Torvalds xprt_release_write(xprt, task); 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds 5801da177e4SLinus Torvalds /* 5811da177e4SLinus Torvalds * We arrive here when awoken from waiting on connection establishment. 5821da177e4SLinus Torvalds */ 5831da177e4SLinus Torvalds static void 5841da177e4SLinus Torvalds xprt_connect_status(struct rpc_task *task) 5851da177e4SLinus Torvalds { 5861da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 5871da177e4SLinus Torvalds 5881da177e4SLinus Torvalds if (task->tk_status >= 0) { 5891da177e4SLinus Torvalds dprintk("RPC: %4d xprt_connect_status: connection established\n", 5901da177e4SLinus Torvalds task->tk_pid); 5911da177e4SLinus Torvalds return; 5921da177e4SLinus Torvalds } 5931da177e4SLinus Torvalds 5941da177e4SLinus Torvalds /* if soft mounted, just cause this RPC to fail */ 5951da177e4SLinus Torvalds if (RPC_IS_SOFT(task)) 5961da177e4SLinus Torvalds task->tk_status = -EIO; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds switch (task->tk_status) { 5991da177e4SLinus Torvalds case -ECONNREFUSED: 6001da177e4SLinus Torvalds case -ECONNRESET: 6011da177e4SLinus Torvalds case -ENOTCONN: 6021da177e4SLinus Torvalds return; 6031da177e4SLinus Torvalds case -ETIMEDOUT: 6041da177e4SLinus Torvalds dprintk("RPC: %4d xprt_connect_status: timed out\n", 6051da177e4SLinus Torvalds task->tk_pid); 6061da177e4SLinus Torvalds break; 6071da177e4SLinus Torvalds default: 6081da177e4SLinus Torvalds printk(KERN_ERR "RPC: error %d connecting to server %s\n", 6091da177e4SLinus Torvalds -task->tk_status, task->tk_client->cl_server); 6101da177e4SLinus Torvalds } 6111da177e4SLinus Torvalds xprt_release_write(xprt, task); 6121da177e4SLinus Torvalds } 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds /* 6151da177e4SLinus Torvalds * Look up the RPC request corresponding to a reply, and then lock it. 6161da177e4SLinus Torvalds */ 6171da177e4SLinus Torvalds static inline struct rpc_rqst * 6181da177e4SLinus Torvalds xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) 6191da177e4SLinus Torvalds { 6201da177e4SLinus Torvalds struct list_head *pos; 6211da177e4SLinus Torvalds struct rpc_rqst *req = NULL; 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds list_for_each(pos, &xprt->recv) { 6241da177e4SLinus Torvalds struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); 6251da177e4SLinus Torvalds if (entry->rq_xid == xid) { 6261da177e4SLinus Torvalds req = entry; 6271da177e4SLinus Torvalds break; 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds } 6301da177e4SLinus Torvalds return req; 6311da177e4SLinus Torvalds } 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds /* 6341da177e4SLinus Torvalds * Complete reply received. 6351da177e4SLinus Torvalds * The TCP code relies on us to remove the request from xprt->pending. 6361da177e4SLinus Torvalds */ 6371da177e4SLinus Torvalds static void 6381da177e4SLinus Torvalds xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) 6391da177e4SLinus Torvalds { 6401da177e4SLinus Torvalds struct rpc_task *task = req->rq_task; 6411da177e4SLinus Torvalds struct rpc_clnt *clnt = task->tk_client; 6421da177e4SLinus Torvalds 6431da177e4SLinus Torvalds /* Adjust congestion window */ 6441da177e4SLinus Torvalds if (!xprt->nocong) { 6451da177e4SLinus Torvalds unsigned timer = task->tk_msg.rpc_proc->p_timer; 6461da177e4SLinus Torvalds xprt_adjust_cwnd(xprt, copied); 6471da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 6481da177e4SLinus Torvalds if (timer) { 6491da177e4SLinus Torvalds if (req->rq_ntrans == 1) 6501da177e4SLinus Torvalds rpc_update_rtt(clnt->cl_rtt, timer, 6511da177e4SLinus Torvalds (long)jiffies - req->rq_xtime); 6521da177e4SLinus Torvalds rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); 6531da177e4SLinus Torvalds } 6541da177e4SLinus Torvalds } 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds #ifdef RPC_PROFILE 6571da177e4SLinus Torvalds /* Profile only reads for now */ 6581da177e4SLinus Torvalds if (copied > 1024) { 6591da177e4SLinus Torvalds static unsigned long nextstat; 6601da177e4SLinus Torvalds static unsigned long pkt_rtt, pkt_len, pkt_cnt; 6611da177e4SLinus Torvalds 6621da177e4SLinus Torvalds pkt_cnt++; 6631da177e4SLinus Torvalds pkt_len += req->rq_slen + copied; 6641da177e4SLinus Torvalds pkt_rtt += jiffies - req->rq_xtime; 6651da177e4SLinus Torvalds if (time_before(nextstat, jiffies)) { 6661da177e4SLinus Torvalds printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); 6671da177e4SLinus Torvalds printk("RPC: %ld %ld %ld %ld stat\n", 6681da177e4SLinus Torvalds jiffies, pkt_cnt, pkt_len, pkt_rtt); 6691da177e4SLinus Torvalds pkt_rtt = pkt_len = pkt_cnt = 0; 6701da177e4SLinus Torvalds nextstat = jiffies + 5 * HZ; 6711da177e4SLinus Torvalds } 6721da177e4SLinus Torvalds } 6731da177e4SLinus Torvalds #endif 6741da177e4SLinus Torvalds 6751da177e4SLinus Torvalds dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); 6761da177e4SLinus Torvalds list_del_init(&req->rq_list); 6771da177e4SLinus Torvalds req->rq_received = req->rq_private_buf.len = copied; 6781da177e4SLinus Torvalds 6791da177e4SLinus Torvalds /* ... and wake up the process. */ 6801da177e4SLinus Torvalds rpc_wake_up_task(task); 6811da177e4SLinus Torvalds return; 6821da177e4SLinus Torvalds } 6831da177e4SLinus Torvalds 6841da177e4SLinus Torvalds static size_t 6851da177e4SLinus Torvalds skb_read_bits(skb_reader_t *desc, void *to, size_t len) 6861da177e4SLinus Torvalds { 6871da177e4SLinus Torvalds if (len > desc->count) 6881da177e4SLinus Torvalds len = desc->count; 6891da177e4SLinus Torvalds if (skb_copy_bits(desc->skb, desc->offset, to, len)) 6901da177e4SLinus Torvalds return 0; 6911da177e4SLinus Torvalds desc->count -= len; 6921da177e4SLinus Torvalds desc->offset += len; 6931da177e4SLinus Torvalds return len; 6941da177e4SLinus Torvalds } 6951da177e4SLinus Torvalds 6961da177e4SLinus Torvalds static size_t 6971da177e4SLinus Torvalds skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) 6981da177e4SLinus Torvalds { 6991da177e4SLinus Torvalds unsigned int csum2, pos; 7001da177e4SLinus Torvalds 7011da177e4SLinus Torvalds if (len > desc->count) 7021da177e4SLinus Torvalds len = desc->count; 7031da177e4SLinus Torvalds pos = desc->offset; 7041da177e4SLinus Torvalds csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); 7051da177e4SLinus Torvalds desc->csum = csum_block_add(desc->csum, csum2, pos); 7061da177e4SLinus Torvalds desc->count -= len; 7071da177e4SLinus Torvalds desc->offset += len; 7081da177e4SLinus Torvalds return len; 7091da177e4SLinus Torvalds } 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds /* 7121da177e4SLinus Torvalds * We have set things up such that we perform the checksum of the UDP 7131da177e4SLinus Torvalds * packet in parallel with the copies into the RPC client iovec. -DaveM 7141da177e4SLinus Torvalds */ 7151da177e4SLinus Torvalds int 7161da177e4SLinus Torvalds csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) 7171da177e4SLinus Torvalds { 7181da177e4SLinus Torvalds skb_reader_t desc; 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds desc.skb = skb; 7211da177e4SLinus Torvalds desc.offset = sizeof(struct udphdr); 7221da177e4SLinus Torvalds desc.count = skb->len - desc.offset; 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_UNNECESSARY) 7251da177e4SLinus Torvalds goto no_checksum; 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds desc.csum = csum_partial(skb->data, desc.offset, skb->csum); 7281da177e4SLinus Torvalds xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); 7291da177e4SLinus Torvalds if (desc.offset != skb->len) { 7301da177e4SLinus Torvalds unsigned int csum2; 7311da177e4SLinus Torvalds csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); 7321da177e4SLinus Torvalds desc.csum = csum_block_add(desc.csum, csum2, desc.offset); 7331da177e4SLinus Torvalds } 7341da177e4SLinus Torvalds if (desc.count) 7351da177e4SLinus Torvalds return -1; 7361da177e4SLinus Torvalds if ((unsigned short)csum_fold(desc.csum)) 7371da177e4SLinus Torvalds return -1; 7381da177e4SLinus Torvalds return 0; 7391da177e4SLinus Torvalds no_checksum: 7401da177e4SLinus Torvalds xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); 7411da177e4SLinus Torvalds if (desc.count) 7421da177e4SLinus Torvalds return -1; 7431da177e4SLinus Torvalds return 0; 7441da177e4SLinus Torvalds } 7451da177e4SLinus Torvalds 7461da177e4SLinus Torvalds /* 7471da177e4SLinus Torvalds * Input handler for RPC replies. Called from a bottom half and hence 7481da177e4SLinus Torvalds * atomic. 7491da177e4SLinus Torvalds */ 7501da177e4SLinus Torvalds static void 7511da177e4SLinus Torvalds udp_data_ready(struct sock *sk, int len) 7521da177e4SLinus Torvalds { 7531da177e4SLinus Torvalds struct rpc_task *task; 7541da177e4SLinus Torvalds struct rpc_xprt *xprt; 7551da177e4SLinus Torvalds struct rpc_rqst *rovr; 7561da177e4SLinus Torvalds struct sk_buff *skb; 7571da177e4SLinus Torvalds int err, repsize, copied; 7581da177e4SLinus Torvalds u32 _xid, *xp; 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 7611da177e4SLinus Torvalds dprintk("RPC: udp_data_ready...\n"); 7621da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) { 7631da177e4SLinus Torvalds printk("RPC: udp_data_ready request not found!\n"); 7641da177e4SLinus Torvalds goto out; 7651da177e4SLinus Torvalds } 7661da177e4SLinus Torvalds 7671da177e4SLinus Torvalds dprintk("RPC: udp_data_ready client %p\n", xprt); 7681da177e4SLinus Torvalds 7691da177e4SLinus Torvalds if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) 7701da177e4SLinus Torvalds goto out; 7711da177e4SLinus Torvalds 7721da177e4SLinus Torvalds if (xprt->shutdown) 7731da177e4SLinus Torvalds goto dropit; 7741da177e4SLinus Torvalds 7751da177e4SLinus Torvalds repsize = skb->len - sizeof(struct udphdr); 7761da177e4SLinus Torvalds if (repsize < 4) { 7771da177e4SLinus Torvalds printk("RPC: impossible RPC reply size %d!\n", repsize); 7781da177e4SLinus Torvalds goto dropit; 7791da177e4SLinus Torvalds } 7801da177e4SLinus Torvalds 7811da177e4SLinus Torvalds /* Copy the XID from the skb... */ 7821da177e4SLinus Torvalds xp = skb_header_pointer(skb, sizeof(struct udphdr), 7831da177e4SLinus Torvalds sizeof(_xid), &_xid); 7841da177e4SLinus Torvalds if (xp == NULL) 7851da177e4SLinus Torvalds goto dropit; 7861da177e4SLinus Torvalds 7871da177e4SLinus Torvalds /* Look up and lock the request corresponding to the given XID */ 7881da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 7891da177e4SLinus Torvalds rovr = xprt_lookup_rqst(xprt, *xp); 7901da177e4SLinus Torvalds if (!rovr) 7911da177e4SLinus Torvalds goto out_unlock; 7921da177e4SLinus Torvalds task = rovr->rq_task; 7931da177e4SLinus Torvalds 7941da177e4SLinus Torvalds dprintk("RPC: %4d received reply\n", task->tk_pid); 7951da177e4SLinus Torvalds 7961da177e4SLinus Torvalds if ((copied = rovr->rq_private_buf.buflen) > repsize) 7971da177e4SLinus Torvalds copied = repsize; 7981da177e4SLinus Torvalds 7991da177e4SLinus Torvalds /* Suck it into the iovec, verify checksum if not done by hw. */ 8001da177e4SLinus Torvalds if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) 8011da177e4SLinus Torvalds goto out_unlock; 8021da177e4SLinus Torvalds 8031da177e4SLinus Torvalds /* Something worked... */ 8041da177e4SLinus Torvalds dst_confirm(skb->dst); 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds xprt_complete_rqst(xprt, rovr, copied); 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds out_unlock: 8091da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 8101da177e4SLinus Torvalds dropit: 8111da177e4SLinus Torvalds skb_free_datagram(sk, skb); 8121da177e4SLinus Torvalds out: 8131da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 8141da177e4SLinus Torvalds } 8151da177e4SLinus Torvalds 8161da177e4SLinus Torvalds /* 8171da177e4SLinus Torvalds * Copy from an skb into memory and shrink the skb. 8181da177e4SLinus Torvalds */ 8191da177e4SLinus Torvalds static inline size_t 8201da177e4SLinus Torvalds tcp_copy_data(skb_reader_t *desc, void *p, size_t len) 8211da177e4SLinus Torvalds { 8221da177e4SLinus Torvalds if (len > desc->count) 8231da177e4SLinus Torvalds len = desc->count; 8241da177e4SLinus Torvalds if (skb_copy_bits(desc->skb, desc->offset, p, len)) 8251da177e4SLinus Torvalds return 0; 8261da177e4SLinus Torvalds desc->offset += len; 8271da177e4SLinus Torvalds desc->count -= len; 8281da177e4SLinus Torvalds return len; 8291da177e4SLinus Torvalds } 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds /* 8321da177e4SLinus Torvalds * TCP read fragment marker 8331da177e4SLinus Torvalds */ 8341da177e4SLinus Torvalds static inline void 8351da177e4SLinus Torvalds tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) 8361da177e4SLinus Torvalds { 8371da177e4SLinus Torvalds size_t len, used; 8381da177e4SLinus Torvalds char *p; 8391da177e4SLinus Torvalds 8401da177e4SLinus Torvalds p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; 8411da177e4SLinus Torvalds len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; 8421da177e4SLinus Torvalds used = tcp_copy_data(desc, p, len); 8431da177e4SLinus Torvalds xprt->tcp_offset += used; 8441da177e4SLinus Torvalds if (used != len) 8451da177e4SLinus Torvalds return; 8461da177e4SLinus Torvalds xprt->tcp_reclen = ntohl(xprt->tcp_recm); 8471da177e4SLinus Torvalds if (xprt->tcp_reclen & 0x80000000) 8481da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_LAST_FRAG; 8491da177e4SLinus Torvalds else 8501da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_LAST_FRAG; 8511da177e4SLinus Torvalds xprt->tcp_reclen &= 0x7fffffff; 8521da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_RECM; 8531da177e4SLinus Torvalds xprt->tcp_offset = 0; 8541da177e4SLinus Torvalds /* Sanity check of the record length */ 8551da177e4SLinus Torvalds if (xprt->tcp_reclen < 4) { 8561da177e4SLinus Torvalds printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); 8571da177e4SLinus Torvalds xprt_disconnect(xprt); 8581da177e4SLinus Torvalds } 8591da177e4SLinus Torvalds dprintk("RPC: reading TCP record fragment of length %d\n", 8601da177e4SLinus Torvalds xprt->tcp_reclen); 8611da177e4SLinus Torvalds } 8621da177e4SLinus Torvalds 8631da177e4SLinus Torvalds static void 8641da177e4SLinus Torvalds tcp_check_recm(struct rpc_xprt *xprt) 8651da177e4SLinus Torvalds { 8661da177e4SLinus Torvalds if (xprt->tcp_offset == xprt->tcp_reclen) { 8671da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_RECM; 8681da177e4SLinus Torvalds xprt->tcp_offset = 0; 8691da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_LAST_FRAG) { 8701da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 8711da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_XID; 8721da177e4SLinus Torvalds xprt->tcp_copied = 0; 8731da177e4SLinus Torvalds } 8741da177e4SLinus Torvalds } 8751da177e4SLinus Torvalds } 8761da177e4SLinus Torvalds 8771da177e4SLinus Torvalds /* 8781da177e4SLinus Torvalds * TCP read xid 8791da177e4SLinus Torvalds */ 8801da177e4SLinus Torvalds static inline void 8811da177e4SLinus Torvalds tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) 8821da177e4SLinus Torvalds { 8831da177e4SLinus Torvalds size_t len, used; 8841da177e4SLinus Torvalds char *p; 8851da177e4SLinus Torvalds 8861da177e4SLinus Torvalds len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; 8871da177e4SLinus Torvalds dprintk("RPC: reading XID (%Zu bytes)\n", len); 8881da177e4SLinus Torvalds p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; 8891da177e4SLinus Torvalds used = tcp_copy_data(desc, p, len); 8901da177e4SLinus Torvalds xprt->tcp_offset += used; 8911da177e4SLinus Torvalds if (used != len) 8921da177e4SLinus Torvalds return; 8931da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_XID; 8941da177e4SLinus Torvalds xprt->tcp_flags |= XPRT_COPY_DATA; 8951da177e4SLinus Torvalds xprt->tcp_copied = 4; 8961da177e4SLinus Torvalds dprintk("RPC: reading reply for XID %08x\n", 8971da177e4SLinus Torvalds ntohl(xprt->tcp_xid)); 8981da177e4SLinus Torvalds tcp_check_recm(xprt); 8991da177e4SLinus Torvalds } 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds /* 9021da177e4SLinus Torvalds * TCP read and complete request 9031da177e4SLinus Torvalds */ 9041da177e4SLinus Torvalds static inline void 9051da177e4SLinus Torvalds tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) 9061da177e4SLinus Torvalds { 9071da177e4SLinus Torvalds struct rpc_rqst *req; 9081da177e4SLinus Torvalds struct xdr_buf *rcvbuf; 9091da177e4SLinus Torvalds size_t len; 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds /* Find and lock the request corresponding to this xid */ 9121da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 9131da177e4SLinus Torvalds req = xprt_lookup_rqst(xprt, xprt->tcp_xid); 9141da177e4SLinus Torvalds if (!req) { 9151da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9161da177e4SLinus Torvalds dprintk("RPC: XID %08x request not found!\n", 9171da177e4SLinus Torvalds ntohl(xprt->tcp_xid)); 9181da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 9191da177e4SLinus Torvalds return; 9201da177e4SLinus Torvalds } 9211da177e4SLinus Torvalds 9221da177e4SLinus Torvalds rcvbuf = &req->rq_private_buf; 9231da177e4SLinus Torvalds len = desc->count; 9241da177e4SLinus Torvalds if (len > xprt->tcp_reclen - xprt->tcp_offset) { 9251da177e4SLinus Torvalds skb_reader_t my_desc; 9261da177e4SLinus Torvalds 9271da177e4SLinus Torvalds len = xprt->tcp_reclen - xprt->tcp_offset; 9281da177e4SLinus Torvalds memcpy(&my_desc, desc, sizeof(my_desc)); 9291da177e4SLinus Torvalds my_desc.count = len; 9301da177e4SLinus Torvalds xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 9311da177e4SLinus Torvalds &my_desc, tcp_copy_data); 9321da177e4SLinus Torvalds desc->count -= len; 9331da177e4SLinus Torvalds desc->offset += len; 9341da177e4SLinus Torvalds } else 9351da177e4SLinus Torvalds xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 9361da177e4SLinus Torvalds desc, tcp_copy_data); 9371da177e4SLinus Torvalds xprt->tcp_copied += len; 9381da177e4SLinus Torvalds xprt->tcp_offset += len; 9391da177e4SLinus Torvalds 9401da177e4SLinus Torvalds if (xprt->tcp_copied == req->rq_private_buf.buflen) 9411da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9421da177e4SLinus Torvalds else if (xprt->tcp_offset == xprt->tcp_reclen) { 9431da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_LAST_FRAG) 9441da177e4SLinus Torvalds xprt->tcp_flags &= ~XPRT_COPY_DATA; 9451da177e4SLinus Torvalds } 9461da177e4SLinus Torvalds 9471da177e4SLinus Torvalds if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { 9481da177e4SLinus Torvalds dprintk("RPC: %4d received reply complete\n", 9491da177e4SLinus Torvalds req->rq_task->tk_pid); 9501da177e4SLinus Torvalds xprt_complete_rqst(xprt, req, xprt->tcp_copied); 9511da177e4SLinus Torvalds } 9521da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 9531da177e4SLinus Torvalds tcp_check_recm(xprt); 9541da177e4SLinus Torvalds } 9551da177e4SLinus Torvalds 9561da177e4SLinus Torvalds /* 9571da177e4SLinus Torvalds * TCP discard extra bytes from a short read 9581da177e4SLinus Torvalds */ 9591da177e4SLinus Torvalds static inline void 9601da177e4SLinus Torvalds tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) 9611da177e4SLinus Torvalds { 9621da177e4SLinus Torvalds size_t len; 9631da177e4SLinus Torvalds 9641da177e4SLinus Torvalds len = xprt->tcp_reclen - xprt->tcp_offset; 9651da177e4SLinus Torvalds if (len > desc->count) 9661da177e4SLinus Torvalds len = desc->count; 9671da177e4SLinus Torvalds desc->count -= len; 9681da177e4SLinus Torvalds desc->offset += len; 9691da177e4SLinus Torvalds xprt->tcp_offset += len; 9701da177e4SLinus Torvalds tcp_check_recm(xprt); 9711da177e4SLinus Torvalds } 9721da177e4SLinus Torvalds 9731da177e4SLinus Torvalds /* 9741da177e4SLinus Torvalds * TCP record receive routine 9751da177e4SLinus Torvalds * We first have to grab the record marker, then the XID, then the data. 9761da177e4SLinus Torvalds */ 9771da177e4SLinus Torvalds static int 9781da177e4SLinus Torvalds tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, 9791da177e4SLinus Torvalds unsigned int offset, size_t len) 9801da177e4SLinus Torvalds { 9811da177e4SLinus Torvalds struct rpc_xprt *xprt = rd_desc->arg.data; 9821da177e4SLinus Torvalds skb_reader_t desc = { 9831da177e4SLinus Torvalds .skb = skb, 9841da177e4SLinus Torvalds .offset = offset, 9851da177e4SLinus Torvalds .count = len, 9861da177e4SLinus Torvalds .csum = 0 9871da177e4SLinus Torvalds }; 9881da177e4SLinus Torvalds 9891da177e4SLinus Torvalds dprintk("RPC: tcp_data_recv\n"); 9901da177e4SLinus Torvalds do { 9911da177e4SLinus Torvalds /* Read in a new fragment marker if necessary */ 9921da177e4SLinus Torvalds /* Can we ever really expect to get completely empty fragments? */ 9931da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_RECM) { 9941da177e4SLinus Torvalds tcp_read_fraghdr(xprt, &desc); 9951da177e4SLinus Torvalds continue; 9961da177e4SLinus Torvalds } 9971da177e4SLinus Torvalds /* Read in the xid if necessary */ 9981da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_XID) { 9991da177e4SLinus Torvalds tcp_read_xid(xprt, &desc); 10001da177e4SLinus Torvalds continue; 10011da177e4SLinus Torvalds } 10021da177e4SLinus Torvalds /* Read in the request data */ 10031da177e4SLinus Torvalds if (xprt->tcp_flags & XPRT_COPY_DATA) { 10041da177e4SLinus Torvalds tcp_read_request(xprt, &desc); 10051da177e4SLinus Torvalds continue; 10061da177e4SLinus Torvalds } 10071da177e4SLinus Torvalds /* Skip over any trailing bytes on short reads */ 10081da177e4SLinus Torvalds tcp_read_discard(xprt, &desc); 10091da177e4SLinus Torvalds } while (desc.count); 10101da177e4SLinus Torvalds dprintk("RPC: tcp_data_recv done\n"); 10111da177e4SLinus Torvalds return len - desc.count; 10121da177e4SLinus Torvalds } 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds static void tcp_data_ready(struct sock *sk, int bytes) 10151da177e4SLinus Torvalds { 10161da177e4SLinus Torvalds struct rpc_xprt *xprt; 10171da177e4SLinus Torvalds read_descriptor_t rd_desc; 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 10201da177e4SLinus Torvalds dprintk("RPC: tcp_data_ready...\n"); 10211da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) { 10221da177e4SLinus Torvalds printk("RPC: tcp_data_ready socket info not found!\n"); 10231da177e4SLinus Torvalds goto out; 10241da177e4SLinus Torvalds } 10251da177e4SLinus Torvalds if (xprt->shutdown) 10261da177e4SLinus Torvalds goto out; 10271da177e4SLinus Torvalds 10281da177e4SLinus Torvalds /* We use rd_desc to pass struct xprt to tcp_data_recv */ 10291da177e4SLinus Torvalds rd_desc.arg.data = xprt; 10301da177e4SLinus Torvalds rd_desc.count = 65536; 10311da177e4SLinus Torvalds tcp_read_sock(sk, &rd_desc, tcp_data_recv); 10321da177e4SLinus Torvalds out: 10331da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 10341da177e4SLinus Torvalds } 10351da177e4SLinus Torvalds 10361da177e4SLinus Torvalds static void 10371da177e4SLinus Torvalds tcp_state_change(struct sock *sk) 10381da177e4SLinus Torvalds { 10391da177e4SLinus Torvalds struct rpc_xprt *xprt; 10401da177e4SLinus Torvalds 10411da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 10421da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk))) 10431da177e4SLinus Torvalds goto out; 10441da177e4SLinus Torvalds dprintk("RPC: tcp_state_change client %p...\n", xprt); 10451da177e4SLinus Torvalds dprintk("RPC: state %x conn %d dead %d zapped %d\n", 10461da177e4SLinus Torvalds sk->sk_state, xprt_connected(xprt), 10471da177e4SLinus Torvalds sock_flag(sk, SOCK_DEAD), 10481da177e4SLinus Torvalds sock_flag(sk, SOCK_ZAPPED)); 10491da177e4SLinus Torvalds 10501da177e4SLinus Torvalds switch (sk->sk_state) { 10511da177e4SLinus Torvalds case TCP_ESTABLISHED: 10521da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 10531da177e4SLinus Torvalds if (!xprt_test_and_set_connected(xprt)) { 10541da177e4SLinus Torvalds /* Reset TCP record info */ 10551da177e4SLinus Torvalds xprt->tcp_offset = 0; 10561da177e4SLinus Torvalds xprt->tcp_reclen = 0; 10571da177e4SLinus Torvalds xprt->tcp_copied = 0; 10581da177e4SLinus Torvalds xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; 10591da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 10621da177e4SLinus Torvalds break; 10631da177e4SLinus Torvalds case TCP_SYN_SENT: 10641da177e4SLinus Torvalds case TCP_SYN_RECV: 10651da177e4SLinus Torvalds break; 10661da177e4SLinus Torvalds default: 10671da177e4SLinus Torvalds if (xprt_test_and_clear_connected(xprt)) 10681da177e4SLinus Torvalds rpc_wake_up_status(&xprt->pending, -ENOTCONN); 10691da177e4SLinus Torvalds break; 10701da177e4SLinus Torvalds } 10711da177e4SLinus Torvalds out: 10721da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 10731da177e4SLinus Torvalds } 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds /* 10761da177e4SLinus Torvalds * Called when more output buffer space is available for this socket. 10771da177e4SLinus Torvalds * We try not to wake our writers until they can make "significant" 10781da177e4SLinus Torvalds * progress, otherwise we'll waste resources thrashing sock_sendmsg 10791da177e4SLinus Torvalds * with a bunch of small requests. 10801da177e4SLinus Torvalds */ 10811da177e4SLinus Torvalds static void 10821da177e4SLinus Torvalds xprt_write_space(struct sock *sk) 10831da177e4SLinus Torvalds { 10841da177e4SLinus Torvalds struct rpc_xprt *xprt; 10851da177e4SLinus Torvalds struct socket *sock; 10861da177e4SLinus Torvalds 10871da177e4SLinus Torvalds read_lock(&sk->sk_callback_lock); 10881da177e4SLinus Torvalds if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) 10891da177e4SLinus Torvalds goto out; 10901da177e4SLinus Torvalds if (xprt->shutdown) 10911da177e4SLinus Torvalds goto out; 10921da177e4SLinus Torvalds 10931da177e4SLinus Torvalds /* Wait until we have enough socket memory */ 10941da177e4SLinus Torvalds if (xprt->stream) { 10951da177e4SLinus Torvalds /* from net/core/stream.c:sk_stream_write_space */ 10961da177e4SLinus Torvalds if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) 10971da177e4SLinus Torvalds goto out; 10981da177e4SLinus Torvalds } else { 10991da177e4SLinus Torvalds /* from net/core/sock.c:sock_def_write_space */ 11001da177e4SLinus Torvalds if (!sock_writeable(sk)) 11011da177e4SLinus Torvalds goto out; 11021da177e4SLinus Torvalds } 11031da177e4SLinus Torvalds 11041da177e4SLinus Torvalds if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) 11051da177e4SLinus Torvalds goto out; 11061da177e4SLinus Torvalds 11071da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 11081da177e4SLinus Torvalds if (xprt->snd_task) 11091da177e4SLinus Torvalds rpc_wake_up_task(xprt->snd_task); 11101da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 11111da177e4SLinus Torvalds out: 11121da177e4SLinus Torvalds read_unlock(&sk->sk_callback_lock); 11131da177e4SLinus Torvalds } 11141da177e4SLinus Torvalds 11151da177e4SLinus Torvalds /* 11161da177e4SLinus Torvalds * RPC receive timeout handler. 11171da177e4SLinus Torvalds */ 11181da177e4SLinus Torvalds static void 11191da177e4SLinus Torvalds xprt_timer(struct rpc_task *task) 11201da177e4SLinus Torvalds { 11211da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 11221da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 11231da177e4SLinus Torvalds 11241da177e4SLinus Torvalds spin_lock(&xprt->sock_lock); 11251da177e4SLinus Torvalds if (req->rq_received) 11261da177e4SLinus Torvalds goto out; 11271da177e4SLinus Torvalds 11281da177e4SLinus Torvalds xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); 11291da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 11301da177e4SLinus Torvalds 11311da177e4SLinus Torvalds dprintk("RPC: %4d xprt_timer (%s request)\n", 11321da177e4SLinus Torvalds task->tk_pid, req ? "pending" : "backlogged"); 11331da177e4SLinus Torvalds 11341da177e4SLinus Torvalds task->tk_status = -ETIMEDOUT; 11351da177e4SLinus Torvalds out: 11361da177e4SLinus Torvalds task->tk_timeout = 0; 11371da177e4SLinus Torvalds rpc_wake_up_task(task); 11381da177e4SLinus Torvalds spin_unlock(&xprt->sock_lock); 11391da177e4SLinus Torvalds } 11401da177e4SLinus Torvalds 11411da177e4SLinus Torvalds /* 11421da177e4SLinus Torvalds * Place the actual RPC call. 11431da177e4SLinus Torvalds * We have to copy the iovec because sendmsg fiddles with its contents. 11441da177e4SLinus Torvalds */ 11451da177e4SLinus Torvalds int 11461da177e4SLinus Torvalds xprt_prepare_transmit(struct rpc_task *task) 11471da177e4SLinus Torvalds { 11481da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 11491da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 11501da177e4SLinus Torvalds int err = 0; 11511da177e4SLinus Torvalds 11521da177e4SLinus Torvalds dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid); 11531da177e4SLinus Torvalds 11541da177e4SLinus Torvalds if (xprt->shutdown) 11551da177e4SLinus Torvalds return -EIO; 11561da177e4SLinus Torvalds 11571da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 11581da177e4SLinus Torvalds if (req->rq_received && !req->rq_bytes_sent) { 11591da177e4SLinus Torvalds err = req->rq_received; 11601da177e4SLinus Torvalds goto out_unlock; 11611da177e4SLinus Torvalds } 11621da177e4SLinus Torvalds if (!__xprt_lock_write(xprt, task)) { 11631da177e4SLinus Torvalds err = -EAGAIN; 11641da177e4SLinus Torvalds goto out_unlock; 11651da177e4SLinus Torvalds } 11661da177e4SLinus Torvalds 11671da177e4SLinus Torvalds if (!xprt_connected(xprt)) { 11681da177e4SLinus Torvalds err = -ENOTCONN; 11691da177e4SLinus Torvalds goto out_unlock; 11701da177e4SLinus Torvalds } 11711da177e4SLinus Torvalds out_unlock: 11721da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 11731da177e4SLinus Torvalds return err; 11741da177e4SLinus Torvalds } 11751da177e4SLinus Torvalds 11761da177e4SLinus Torvalds void 11771da177e4SLinus Torvalds xprt_transmit(struct rpc_task *task) 11781da177e4SLinus Torvalds { 11791da177e4SLinus Torvalds struct rpc_clnt *clnt = task->tk_client; 11801da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 11811da177e4SLinus Torvalds struct rpc_xprt *xprt = req->rq_xprt; 11821da177e4SLinus Torvalds int status, retry = 0; 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds 11851da177e4SLinus Torvalds dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 11861da177e4SLinus Torvalds 11871da177e4SLinus Torvalds /* set up everything as needed. */ 11881da177e4SLinus Torvalds /* Write the record marker */ 11891da177e4SLinus Torvalds if (xprt->stream) { 11901da177e4SLinus Torvalds u32 *marker = req->rq_svec[0].iov_base; 11911da177e4SLinus Torvalds 11921da177e4SLinus Torvalds *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); 11931da177e4SLinus Torvalds } 11941da177e4SLinus Torvalds 11951da177e4SLinus Torvalds smp_rmb(); 11961da177e4SLinus Torvalds if (!req->rq_received) { 11971da177e4SLinus Torvalds if (list_empty(&req->rq_list)) { 11981da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 11991da177e4SLinus Torvalds /* Update the softirq receive buffer */ 12001da177e4SLinus Torvalds memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 12011da177e4SLinus Torvalds sizeof(req->rq_private_buf)); 12021da177e4SLinus Torvalds /* Add request to the receive list */ 12031da177e4SLinus Torvalds list_add_tail(&req->rq_list, &xprt->recv); 12041da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 12051da177e4SLinus Torvalds xprt_reset_majortimeo(req); 12061da177e4SLinus Torvalds } 12071da177e4SLinus Torvalds } else if (!req->rq_bytes_sent) 12081da177e4SLinus Torvalds return; 12091da177e4SLinus Torvalds 12101da177e4SLinus Torvalds /* Continue transmitting the packet/record. We must be careful 12111da177e4SLinus Torvalds * to cope with writespace callbacks arriving _after_ we have 12121da177e4SLinus Torvalds * called xprt_sendmsg(). 12131da177e4SLinus Torvalds */ 12141da177e4SLinus Torvalds while (1) { 12151da177e4SLinus Torvalds req->rq_xtime = jiffies; 12161da177e4SLinus Torvalds status = xprt_sendmsg(xprt, req); 12171da177e4SLinus Torvalds 12181da177e4SLinus Torvalds if (status < 0) 12191da177e4SLinus Torvalds break; 12201da177e4SLinus Torvalds 12211da177e4SLinus Torvalds if (xprt->stream) { 12221da177e4SLinus Torvalds req->rq_bytes_sent += status; 12231da177e4SLinus Torvalds 12241da177e4SLinus Torvalds /* If we've sent the entire packet, immediately 12251da177e4SLinus Torvalds * reset the count of bytes sent. */ 12261da177e4SLinus Torvalds if (req->rq_bytes_sent >= req->rq_slen) { 12271da177e4SLinus Torvalds req->rq_bytes_sent = 0; 12281da177e4SLinus Torvalds goto out_receive; 12291da177e4SLinus Torvalds } 12301da177e4SLinus Torvalds } else { 12311da177e4SLinus Torvalds if (status >= req->rq_slen) 12321da177e4SLinus Torvalds goto out_receive; 12331da177e4SLinus Torvalds status = -EAGAIN; 12341da177e4SLinus Torvalds break; 12351da177e4SLinus Torvalds } 12361da177e4SLinus Torvalds 12371da177e4SLinus Torvalds dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", 12381da177e4SLinus Torvalds task->tk_pid, req->rq_slen - req->rq_bytes_sent, 12391da177e4SLinus Torvalds req->rq_slen); 12401da177e4SLinus Torvalds 12411da177e4SLinus Torvalds status = -EAGAIN; 12421da177e4SLinus Torvalds if (retry++ > 50) 12431da177e4SLinus Torvalds break; 12441da177e4SLinus Torvalds } 12451da177e4SLinus Torvalds 12461da177e4SLinus Torvalds /* Note: at this point, task->tk_sleeping has not yet been set, 12471da177e4SLinus Torvalds * hence there is no danger of the waking up task being put on 12481da177e4SLinus Torvalds * schedq, and being picked up by a parallel run of rpciod(). 12491da177e4SLinus Torvalds */ 12501da177e4SLinus Torvalds task->tk_status = status; 12511da177e4SLinus Torvalds 12521da177e4SLinus Torvalds switch (status) { 12531da177e4SLinus Torvalds case -EAGAIN: 12541da177e4SLinus Torvalds if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { 12551da177e4SLinus Torvalds /* Protect against races with xprt_write_space */ 12561da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 12571da177e4SLinus Torvalds /* Don't race with disconnect */ 12581da177e4SLinus Torvalds if (!xprt_connected(xprt)) 12591da177e4SLinus Torvalds task->tk_status = -ENOTCONN; 12601da177e4SLinus Torvalds else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { 12611da177e4SLinus Torvalds task->tk_timeout = req->rq_timeout; 12621da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, NULL, NULL); 12631da177e4SLinus Torvalds } 12641da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 12651da177e4SLinus Torvalds return; 12661da177e4SLinus Torvalds } 12671da177e4SLinus Torvalds /* Keep holding the socket if it is blocked */ 12681da177e4SLinus Torvalds rpc_delay(task, HZ>>4); 12691da177e4SLinus Torvalds return; 12701da177e4SLinus Torvalds case -ECONNREFUSED: 12711da177e4SLinus Torvalds task->tk_timeout = RPC_REESTABLISH_TIMEOUT; 12721da177e4SLinus Torvalds rpc_sleep_on(&xprt->sending, task, NULL, NULL); 12731da177e4SLinus Torvalds case -ENOTCONN: 12741da177e4SLinus Torvalds return; 12751da177e4SLinus Torvalds default: 12761da177e4SLinus Torvalds if (xprt->stream) 12771da177e4SLinus Torvalds xprt_disconnect(xprt); 12781da177e4SLinus Torvalds } 12791da177e4SLinus Torvalds xprt_release_write(xprt, task); 12801da177e4SLinus Torvalds return; 12811da177e4SLinus Torvalds out_receive: 12821da177e4SLinus Torvalds dprintk("RPC: %4d xmit complete\n", task->tk_pid); 12831da177e4SLinus Torvalds /* Set the task's receive timeout value */ 12841da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 12851da177e4SLinus Torvalds if (!xprt->nocong) { 12861da177e4SLinus Torvalds int timer = task->tk_msg.rpc_proc->p_timer; 12871da177e4SLinus Torvalds task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); 12881da177e4SLinus Torvalds task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; 12891da177e4SLinus Torvalds if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) 12901da177e4SLinus Torvalds task->tk_timeout = xprt->timeout.to_maxval; 12911da177e4SLinus Torvalds } else 12921da177e4SLinus Torvalds task->tk_timeout = req->rq_timeout; 12931da177e4SLinus Torvalds /* Don't race with disconnect */ 12941da177e4SLinus Torvalds if (!xprt_connected(xprt)) 12951da177e4SLinus Torvalds task->tk_status = -ENOTCONN; 12961da177e4SLinus Torvalds else if (!req->rq_received) 12971da177e4SLinus Torvalds rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); 12981da177e4SLinus Torvalds __xprt_release_write(xprt, task); 12991da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 13001da177e4SLinus Torvalds } 13011da177e4SLinus Torvalds 13021da177e4SLinus Torvalds /* 13031da177e4SLinus Torvalds * Reserve an RPC call slot. 13041da177e4SLinus Torvalds */ 13051da177e4SLinus Torvalds static inline void 13061da177e4SLinus Torvalds do_xprt_reserve(struct rpc_task *task) 13071da177e4SLinus Torvalds { 13081da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 13091da177e4SLinus Torvalds 13101da177e4SLinus Torvalds task->tk_status = 0; 13111da177e4SLinus Torvalds if (task->tk_rqstp) 13121da177e4SLinus Torvalds return; 13131da177e4SLinus Torvalds if (!list_empty(&xprt->free)) { 13141da177e4SLinus Torvalds struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 13151da177e4SLinus Torvalds list_del_init(&req->rq_list); 13161da177e4SLinus Torvalds task->tk_rqstp = req; 13171da177e4SLinus Torvalds xprt_request_init(task, xprt); 13181da177e4SLinus Torvalds return; 13191da177e4SLinus Torvalds } 13201da177e4SLinus Torvalds dprintk("RPC: waiting for request slot\n"); 13211da177e4SLinus Torvalds task->tk_status = -EAGAIN; 13221da177e4SLinus Torvalds task->tk_timeout = 0; 13231da177e4SLinus Torvalds rpc_sleep_on(&xprt->backlog, task, NULL, NULL); 13241da177e4SLinus Torvalds } 13251da177e4SLinus Torvalds 13261da177e4SLinus Torvalds void 13271da177e4SLinus Torvalds xprt_reserve(struct rpc_task *task) 13281da177e4SLinus Torvalds { 13291da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 13301da177e4SLinus Torvalds 13311da177e4SLinus Torvalds task->tk_status = -EIO; 13321da177e4SLinus Torvalds if (!xprt->shutdown) { 13331da177e4SLinus Torvalds spin_lock(&xprt->xprt_lock); 13341da177e4SLinus Torvalds do_xprt_reserve(task); 13351da177e4SLinus Torvalds spin_unlock(&xprt->xprt_lock); 13361da177e4SLinus Torvalds if (task->tk_rqstp) 13371da177e4SLinus Torvalds del_timer_sync(&xprt->timer); 13381da177e4SLinus Torvalds } 13391da177e4SLinus Torvalds } 13401da177e4SLinus Torvalds 13411da177e4SLinus Torvalds /* 13421da177e4SLinus Torvalds * Allocate a 'unique' XID 13431da177e4SLinus Torvalds */ 13441da177e4SLinus Torvalds static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) 13451da177e4SLinus Torvalds { 13461da177e4SLinus Torvalds return xprt->xid++; 13471da177e4SLinus Torvalds } 13481da177e4SLinus Torvalds 13491da177e4SLinus Torvalds static inline void xprt_init_xid(struct rpc_xprt *xprt) 13501da177e4SLinus Torvalds { 13511da177e4SLinus Torvalds get_random_bytes(&xprt->xid, sizeof(xprt->xid)); 13521da177e4SLinus Torvalds } 13531da177e4SLinus Torvalds 13541da177e4SLinus Torvalds /* 13551da177e4SLinus Torvalds * Initialize RPC request 13561da177e4SLinus Torvalds */ 13571da177e4SLinus Torvalds static void 13581da177e4SLinus Torvalds xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) 13591da177e4SLinus Torvalds { 13601da177e4SLinus Torvalds struct rpc_rqst *req = task->tk_rqstp; 13611da177e4SLinus Torvalds 13621da177e4SLinus Torvalds req->rq_timeout = xprt->timeout.to_initval; 13631da177e4SLinus Torvalds req->rq_task = task; 13641da177e4SLinus Torvalds req->rq_xprt = xprt; 13651da177e4SLinus Torvalds req->rq_xid = xprt_alloc_xid(xprt); 13661da177e4SLinus Torvalds dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 13671da177e4SLinus Torvalds req, ntohl(req->rq_xid)); 13681da177e4SLinus Torvalds } 13691da177e4SLinus Torvalds 13701da177e4SLinus Torvalds /* 13711da177e4SLinus Torvalds * Release an RPC call slot 13721da177e4SLinus Torvalds */ 13731da177e4SLinus Torvalds void 13741da177e4SLinus Torvalds xprt_release(struct rpc_task *task) 13751da177e4SLinus Torvalds { 13761da177e4SLinus Torvalds struct rpc_xprt *xprt = task->tk_xprt; 13771da177e4SLinus Torvalds struct rpc_rqst *req; 13781da177e4SLinus Torvalds 13791da177e4SLinus Torvalds if (!(req = task->tk_rqstp)) 13801da177e4SLinus Torvalds return; 13811da177e4SLinus Torvalds spin_lock_bh(&xprt->sock_lock); 13821da177e4SLinus Torvalds __xprt_release_write(xprt, task); 13831da177e4SLinus Torvalds __xprt_put_cong(xprt, req); 13841da177e4SLinus Torvalds if (!list_empty(&req->rq_list)) 13851da177e4SLinus Torvalds list_del(&req->rq_list); 13861da177e4SLinus Torvalds xprt->last_used = jiffies; 13871da177e4SLinus Torvalds if (list_empty(&xprt->recv) && !xprt->shutdown) 13881da177e4SLinus Torvalds mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); 13891da177e4SLinus Torvalds spin_unlock_bh(&xprt->sock_lock); 13901da177e4SLinus Torvalds task->tk_rqstp = NULL; 13911da177e4SLinus Torvalds memset(req, 0, sizeof(*req)); /* mark unused */ 13921da177e4SLinus Torvalds 13931da177e4SLinus Torvalds dprintk("RPC: %4d release request %p\n", task->tk_pid, req); 13941da177e4SLinus Torvalds 13951da177e4SLinus Torvalds spin_lock(&xprt->xprt_lock); 13961da177e4SLinus Torvalds list_add(&req->rq_list, &xprt->free); 13971da177e4SLinus Torvalds xprt_clear_backlog(xprt); 13981da177e4SLinus Torvalds spin_unlock(&xprt->xprt_lock); 13991da177e4SLinus Torvalds } 14001da177e4SLinus Torvalds 14011da177e4SLinus Torvalds /* 14021da177e4SLinus Torvalds * Set default timeout parameters 14031da177e4SLinus Torvalds */ 14041da177e4SLinus Torvalds static void 14051da177e4SLinus Torvalds xprt_default_timeout(struct rpc_timeout *to, int proto) 14061da177e4SLinus Torvalds { 14071da177e4SLinus Torvalds if (proto == IPPROTO_UDP) 14081da177e4SLinus Torvalds xprt_set_timeout(to, 5, 5 * HZ); 14091da177e4SLinus Torvalds else 14101da177e4SLinus Torvalds xprt_set_timeout(to, 5, 60 * HZ); 14111da177e4SLinus Torvalds } 14121da177e4SLinus Torvalds 14131da177e4SLinus Torvalds /* 14141da177e4SLinus Torvalds * Set constant timeout 14151da177e4SLinus Torvalds */ 14161da177e4SLinus Torvalds void 14171da177e4SLinus Torvalds xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) 14181da177e4SLinus Torvalds { 14191da177e4SLinus Torvalds to->to_initval = 14201da177e4SLinus Torvalds to->to_increment = incr; 14211da177e4SLinus Torvalds to->to_maxval = incr * retr; 14221da177e4SLinus Torvalds to->to_retries = retr; 14231da177e4SLinus Torvalds to->to_exponential = 0; 14241da177e4SLinus Torvalds } 14251da177e4SLinus Torvalds 14261da177e4SLinus Torvalds unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 14271da177e4SLinus Torvalds unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; 14281da177e4SLinus Torvalds 14291da177e4SLinus Torvalds /* 14301da177e4SLinus Torvalds * Initialize an RPC client 14311da177e4SLinus Torvalds */ 14321da177e4SLinus Torvalds static struct rpc_xprt * 14331da177e4SLinus Torvalds xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) 14341da177e4SLinus Torvalds { 14351da177e4SLinus Torvalds struct rpc_xprt *xprt; 14361da177e4SLinus Torvalds unsigned int entries; 14371da177e4SLinus Torvalds size_t slot_table_size; 14381da177e4SLinus Torvalds struct rpc_rqst *req; 14391da177e4SLinus Torvalds 14401da177e4SLinus Torvalds dprintk("RPC: setting up %s transport...\n", 14411da177e4SLinus Torvalds proto == IPPROTO_UDP? "UDP" : "TCP"); 14421da177e4SLinus Torvalds 14431da177e4SLinus Torvalds entries = (proto == IPPROTO_TCP)? 14441da177e4SLinus Torvalds xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; 14451da177e4SLinus Torvalds 14461da177e4SLinus Torvalds if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) 14471da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 14481da177e4SLinus Torvalds memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ 14491da177e4SLinus Torvalds xprt->max_reqs = entries; 14501da177e4SLinus Torvalds slot_table_size = entries * sizeof(xprt->slot[0]); 14511da177e4SLinus Torvalds xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); 14521da177e4SLinus Torvalds if (xprt->slot == NULL) { 14531da177e4SLinus Torvalds kfree(xprt); 14541da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 14551da177e4SLinus Torvalds } 14561da177e4SLinus Torvalds memset(xprt->slot, 0, slot_table_size); 14571da177e4SLinus Torvalds 14581da177e4SLinus Torvalds xprt->addr = *ap; 14591da177e4SLinus Torvalds xprt->prot = proto; 14601da177e4SLinus Torvalds xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; 14611da177e4SLinus Torvalds if (xprt->stream) { 14621da177e4SLinus Torvalds xprt->cwnd = RPC_MAXCWND(xprt); 14631da177e4SLinus Torvalds xprt->nocong = 1; 14641da177e4SLinus Torvalds xprt->max_payload = (1U << 31) - 1; 14651da177e4SLinus Torvalds } else { 14661da177e4SLinus Torvalds xprt->cwnd = RPC_INITCWND; 14671da177e4SLinus Torvalds xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 14681da177e4SLinus Torvalds } 14691da177e4SLinus Torvalds spin_lock_init(&xprt->sock_lock); 14701da177e4SLinus Torvalds spin_lock_init(&xprt->xprt_lock); 14711da177e4SLinus Torvalds init_waitqueue_head(&xprt->cong_wait); 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds INIT_LIST_HEAD(&xprt->free); 14741da177e4SLinus Torvalds INIT_LIST_HEAD(&xprt->recv); 14751da177e4SLinus Torvalds INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); 14761da177e4SLinus Torvalds INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); 14771da177e4SLinus Torvalds init_timer(&xprt->timer); 14781da177e4SLinus Torvalds xprt->timer.function = xprt_init_autodisconnect; 14791da177e4SLinus Torvalds xprt->timer.data = (unsigned long) xprt; 14801da177e4SLinus Torvalds xprt->last_used = jiffies; 14811da177e4SLinus Torvalds xprt->port = XPRT_MAX_RESVPORT; 14821da177e4SLinus Torvalds 14831da177e4SLinus Torvalds /* Set timeout parameters */ 14841da177e4SLinus Torvalds if (to) { 14851da177e4SLinus Torvalds xprt->timeout = *to; 14861da177e4SLinus Torvalds } else 14871da177e4SLinus Torvalds xprt_default_timeout(&xprt->timeout, xprt->prot); 14881da177e4SLinus Torvalds 14891da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 14901da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 14911da177e4SLinus Torvalds rpc_init_wait_queue(&xprt->resend, "xprt_resend"); 14921da177e4SLinus Torvalds rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 14931da177e4SLinus Torvalds 14941da177e4SLinus Torvalds /* initialize free list */ 14951da177e4SLinus Torvalds for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) 14961da177e4SLinus Torvalds list_add(&req->rq_list, &xprt->free); 14971da177e4SLinus Torvalds 14981da177e4SLinus Torvalds xprt_init_xid(xprt); 14991da177e4SLinus Torvalds 15001da177e4SLinus Torvalds /* Check whether we want to use a reserved port */ 15011da177e4SLinus Torvalds xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; 15021da177e4SLinus Torvalds 15031da177e4SLinus Torvalds dprintk("RPC: created transport %p with %u slots\n", xprt, 15041da177e4SLinus Torvalds xprt->max_reqs); 15051da177e4SLinus Torvalds 15061da177e4SLinus Torvalds return xprt; 15071da177e4SLinus Torvalds } 15081da177e4SLinus Torvalds 15091da177e4SLinus Torvalds /* 15101da177e4SLinus Torvalds * Bind to a reserved port 15111da177e4SLinus Torvalds */ 15121da177e4SLinus Torvalds static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 15131da177e4SLinus Torvalds { 15141da177e4SLinus Torvalds struct sockaddr_in myaddr = { 15151da177e4SLinus Torvalds .sin_family = AF_INET, 15161da177e4SLinus Torvalds }; 15171da177e4SLinus Torvalds int err, port; 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds /* Were we already bound to a given port? Try to reuse it */ 15201da177e4SLinus Torvalds port = xprt->port; 15211da177e4SLinus Torvalds do { 15221da177e4SLinus Torvalds myaddr.sin_port = htons(port); 15231da177e4SLinus Torvalds err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, 15241da177e4SLinus Torvalds sizeof(myaddr)); 15251da177e4SLinus Torvalds if (err == 0) { 15261da177e4SLinus Torvalds xprt->port = port; 15271da177e4SLinus Torvalds return 0; 15281da177e4SLinus Torvalds } 15291da177e4SLinus Torvalds if (--port == 0) 15301da177e4SLinus Torvalds port = XPRT_MAX_RESVPORT; 15311da177e4SLinus Torvalds } while (err == -EADDRINUSE && port != xprt->port); 15321da177e4SLinus Torvalds 15331da177e4SLinus Torvalds printk("RPC: Can't bind to reserved port (%d).\n", -err); 15341da177e4SLinus Torvalds return err; 15351da177e4SLinus Torvalds } 15361da177e4SLinus Torvalds 15371da177e4SLinus Torvalds static void 15381da177e4SLinus Torvalds xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) 15391da177e4SLinus Torvalds { 15401da177e4SLinus Torvalds struct sock *sk = sock->sk; 15411da177e4SLinus Torvalds 15421da177e4SLinus Torvalds if (xprt->inet) 15431da177e4SLinus Torvalds return; 15441da177e4SLinus Torvalds 15451da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 15461da177e4SLinus Torvalds sk->sk_user_data = xprt; 15471da177e4SLinus Torvalds xprt->old_data_ready = sk->sk_data_ready; 15481da177e4SLinus Torvalds xprt->old_state_change = sk->sk_state_change; 15491da177e4SLinus Torvalds xprt->old_write_space = sk->sk_write_space; 15501da177e4SLinus Torvalds if (xprt->prot == IPPROTO_UDP) { 15511da177e4SLinus Torvalds sk->sk_data_ready = udp_data_ready; 15521da177e4SLinus Torvalds sk->sk_no_check = UDP_CSUM_NORCV; 15531da177e4SLinus Torvalds xprt_set_connected(xprt); 15541da177e4SLinus Torvalds } else { 15551da177e4SLinus Torvalds tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ 15561da177e4SLinus Torvalds sk->sk_data_ready = tcp_data_ready; 15571da177e4SLinus Torvalds sk->sk_state_change = tcp_state_change; 15581da177e4SLinus Torvalds xprt_clear_connected(xprt); 15591da177e4SLinus Torvalds } 15601da177e4SLinus Torvalds sk->sk_write_space = xprt_write_space; 15611da177e4SLinus Torvalds 15621da177e4SLinus Torvalds /* Reset to new socket */ 15631da177e4SLinus Torvalds xprt->sock = sock; 15641da177e4SLinus Torvalds xprt->inet = sk; 15651da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 15661da177e4SLinus Torvalds 15671da177e4SLinus Torvalds return; 15681da177e4SLinus Torvalds } 15691da177e4SLinus Torvalds 15701da177e4SLinus Torvalds /* 15711da177e4SLinus Torvalds * Set socket buffer length 15721da177e4SLinus Torvalds */ 15731da177e4SLinus Torvalds void 15741da177e4SLinus Torvalds xprt_sock_setbufsize(struct rpc_xprt *xprt) 15751da177e4SLinus Torvalds { 15761da177e4SLinus Torvalds struct sock *sk = xprt->inet; 15771da177e4SLinus Torvalds 15781da177e4SLinus Torvalds if (xprt->stream) 15791da177e4SLinus Torvalds return; 15801da177e4SLinus Torvalds if (xprt->rcvsize) { 15811da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 15821da177e4SLinus Torvalds sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; 15831da177e4SLinus Torvalds } 15841da177e4SLinus Torvalds if (xprt->sndsize) { 15851da177e4SLinus Torvalds sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 15861da177e4SLinus Torvalds sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; 15871da177e4SLinus Torvalds sk->sk_write_space(sk); 15881da177e4SLinus Torvalds } 15891da177e4SLinus Torvalds } 15901da177e4SLinus Torvalds 15911da177e4SLinus Torvalds /* 15921da177e4SLinus Torvalds * Datastream sockets are created here, but xprt_connect will create 15931da177e4SLinus Torvalds * and connect stream sockets. 15941da177e4SLinus Torvalds */ 15951da177e4SLinus Torvalds static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) 15961da177e4SLinus Torvalds { 15971da177e4SLinus Torvalds struct socket *sock; 15981da177e4SLinus Torvalds int type, err; 15991da177e4SLinus Torvalds 16001da177e4SLinus Torvalds dprintk("RPC: xprt_create_socket(%s %d)\n", 16011da177e4SLinus Torvalds (proto == IPPROTO_UDP)? "udp" : "tcp", proto); 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 16041da177e4SLinus Torvalds 16051da177e4SLinus Torvalds if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { 16061da177e4SLinus Torvalds printk("RPC: can't create socket (%d).\n", -err); 16071da177e4SLinus Torvalds return NULL; 16081da177e4SLinus Torvalds } 16091da177e4SLinus Torvalds 16101da177e4SLinus Torvalds /* If the caller has the capability, bind to a reserved port */ 16111da177e4SLinus Torvalds if (resvport && xprt_bindresvport(xprt, sock) < 0) { 16121da177e4SLinus Torvalds printk("RPC: can't bind to reserved port.\n"); 16131da177e4SLinus Torvalds goto failed; 16141da177e4SLinus Torvalds } 16151da177e4SLinus Torvalds 16161da177e4SLinus Torvalds return sock; 16171da177e4SLinus Torvalds 16181da177e4SLinus Torvalds failed: 16191da177e4SLinus Torvalds sock_release(sock); 16201da177e4SLinus Torvalds return NULL; 16211da177e4SLinus Torvalds } 16221da177e4SLinus Torvalds 16231da177e4SLinus Torvalds /* 16241da177e4SLinus Torvalds * Create an RPC client transport given the protocol and peer address. 16251da177e4SLinus Torvalds */ 16261da177e4SLinus Torvalds struct rpc_xprt * 16271da177e4SLinus Torvalds xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) 16281da177e4SLinus Torvalds { 16291da177e4SLinus Torvalds struct rpc_xprt *xprt; 16301da177e4SLinus Torvalds 16311da177e4SLinus Torvalds xprt = xprt_setup(proto, sap, to); 16321da177e4SLinus Torvalds if (IS_ERR(xprt)) 16331da177e4SLinus Torvalds dprintk("RPC: xprt_create_proto failed\n"); 16341da177e4SLinus Torvalds else 16351da177e4SLinus Torvalds dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); 16361da177e4SLinus Torvalds return xprt; 16371da177e4SLinus Torvalds } 16381da177e4SLinus Torvalds 16391da177e4SLinus Torvalds /* 16401da177e4SLinus Torvalds * Prepare for transport shutdown. 16411da177e4SLinus Torvalds */ 16421da177e4SLinus Torvalds static void 16431da177e4SLinus Torvalds xprt_shutdown(struct rpc_xprt *xprt) 16441da177e4SLinus Torvalds { 16451da177e4SLinus Torvalds xprt->shutdown = 1; 16461da177e4SLinus Torvalds rpc_wake_up(&xprt->sending); 16471da177e4SLinus Torvalds rpc_wake_up(&xprt->resend); 16481da177e4SLinus Torvalds rpc_wake_up(&xprt->pending); 16491da177e4SLinus Torvalds rpc_wake_up(&xprt->backlog); 16501da177e4SLinus Torvalds wake_up(&xprt->cong_wait); 16511da177e4SLinus Torvalds del_timer_sync(&xprt->timer); 16521da177e4SLinus Torvalds } 16531da177e4SLinus Torvalds 16541da177e4SLinus Torvalds /* 16551da177e4SLinus Torvalds * Clear the xprt backlog queue 16561da177e4SLinus Torvalds */ 16571da177e4SLinus Torvalds static int 16581da177e4SLinus Torvalds xprt_clear_backlog(struct rpc_xprt *xprt) { 16591da177e4SLinus Torvalds rpc_wake_up_next(&xprt->backlog); 16601da177e4SLinus Torvalds wake_up(&xprt->cong_wait); 16611da177e4SLinus Torvalds return 1; 16621da177e4SLinus Torvalds } 16631da177e4SLinus Torvalds 16641da177e4SLinus Torvalds /* 16651da177e4SLinus Torvalds * Destroy an RPC transport, killing off all requests. 16661da177e4SLinus Torvalds */ 16671da177e4SLinus Torvalds int 16681da177e4SLinus Torvalds xprt_destroy(struct rpc_xprt *xprt) 16691da177e4SLinus Torvalds { 16701da177e4SLinus Torvalds dprintk("RPC: destroying transport %p\n", xprt); 16711da177e4SLinus Torvalds xprt_shutdown(xprt); 16721da177e4SLinus Torvalds xprt_disconnect(xprt); 16731da177e4SLinus Torvalds xprt_close(xprt); 16741da177e4SLinus Torvalds kfree(xprt->slot); 16751da177e4SLinus Torvalds kfree(xprt); 16761da177e4SLinus Torvalds 16771da177e4SLinus Torvalds return 0; 16781da177e4SLinus Torvalds } 1679