1 /* 2 * linux/net/sunrpc/xprt.c 3 * 4 * This is a generic RPC call interface supporting congestion avoidance, 5 * and asynchronous calls. 6 * 7 * The interface works like this: 8 * 9 * - When a process places a call, it allocates a request slot if 10 * one is available. Otherwise, it sleeps on the backlog queue 11 * (xprt_reserve). 12 * - Next, the caller puts together the RPC message, stuffs it into 13 * the request struct, and calls xprt_transmit(). 14 * - xprt_transmit sends the message and installs the caller on the 15 * transport's wait list. At the same time, if a reply is expected, 16 * it installs a timer that is run after the packet's timeout has 17 * expired. 18 * - When a packet arrives, the data_ready handler walks the list of 19 * pending requests for that transport. If a matching XID is found, the 20 * caller is woken up, and the timer removed. 21 * - When no reply arrives within the timeout interval, the timer is 22 * fired by the kernel and runs xprt_timer(). It either adjusts the 23 * timeout values (minor timeout) or wakes up the caller with a status 24 * of -ETIMEDOUT. 25 * - When the caller receives a notification from RPC that a reply arrived, 26 * it should release the RPC slot, and process the reply. 27 * If the call timed out, it may choose to retry the operation by 28 * adjusting the initial timeout value, and simply calling rpc_call 29 * again. 30 * 31 * Support for async RPC is done through a set of RPC-specific scheduling 32 * primitives that `transparently' work for processes as well as async 33 * tasks that rely on callbacks. 34 * 35 * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> 36 * 37 * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> 38 */ 39 40 #include <linux/module.h> 41 42 #include <linux/types.h> 43 #include <linux/interrupt.h> 44 #include <linux/workqueue.h> 45 #include <linux/net.h> 46 #include <linux/ktime.h> 47 48 #include <linux/sunrpc/clnt.h> 49 #include <linux/sunrpc/metrics.h> 50 #include <linux/sunrpc/bc_xprt.h> 51 52 #include "sunrpc.h" 53 54 /* 55 * Local variables 56 */ 57 58 #ifdef RPC_DEBUG 59 # define RPCDBG_FACILITY RPCDBG_XPRT 60 #endif 61 62 /* 63 * Local functions 64 */ 65 static void xprt_init(struct rpc_xprt *xprt, struct net *net); 66 static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 67 static void xprt_connect_status(struct rpc_task *task); 68 static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 69 static void xprt_destroy(struct rpc_xprt *xprt); 70 71 static DEFINE_SPINLOCK(xprt_list_lock); 72 static LIST_HEAD(xprt_list); 73 74 /* 75 * The transport code maintains an estimate on the maximum number of out- 76 * standing RPC requests, using a smoothed version of the congestion 77 * avoidance implemented in 44BSD. This is basically the Van Jacobson 78 * congestion algorithm: If a retransmit occurs, the congestion window is 79 * halved; otherwise, it is incremented by 1/cwnd when 80 * 81 * - a reply is received and 82 * - a full number of requests are outstanding and 83 * - the congestion window hasn't been updated recently. 84 */ 85 #define RPC_CWNDSHIFT (8U) 86 #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) 87 #define RPC_INITCWND RPC_CWNDSCALE 88 #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) 89 90 #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) 91 92 /** 93 * xprt_register_transport - register a transport implementation 94 * @transport: transport to register 95 * 96 * If a transport implementation is loaded as a kernel module, it can 97 * call this interface to make itself known to the RPC client. 98 * 99 * Returns: 100 * 0: transport successfully registered 101 * -EEXIST: transport already registered 102 * -EINVAL: transport module being unloaded 103 */ 104 int xprt_register_transport(struct xprt_class *transport) 105 { 106 struct xprt_class *t; 107 int result; 108 109 result = -EEXIST; 110 spin_lock(&xprt_list_lock); 111 list_for_each_entry(t, &xprt_list, list) { 112 /* don't register the same transport class twice */ 113 if (t->ident == transport->ident) 114 goto out; 115 } 116 117 list_add_tail(&transport->list, &xprt_list); 118 printk(KERN_INFO "RPC: Registered %s transport module.\n", 119 transport->name); 120 result = 0; 121 122 out: 123 spin_unlock(&xprt_list_lock); 124 return result; 125 } 126 EXPORT_SYMBOL_GPL(xprt_register_transport); 127 128 /** 129 * xprt_unregister_transport - unregister a transport implementation 130 * @transport: transport to unregister 131 * 132 * Returns: 133 * 0: transport successfully unregistered 134 * -ENOENT: transport never registered 135 */ 136 int xprt_unregister_transport(struct xprt_class *transport) 137 { 138 struct xprt_class *t; 139 int result; 140 141 result = 0; 142 spin_lock(&xprt_list_lock); 143 list_for_each_entry(t, &xprt_list, list) { 144 if (t == transport) { 145 printk(KERN_INFO 146 "RPC: Unregistered %s transport module.\n", 147 transport->name); 148 list_del_init(&transport->list); 149 goto out; 150 } 151 } 152 result = -ENOENT; 153 154 out: 155 spin_unlock(&xprt_list_lock); 156 return result; 157 } 158 EXPORT_SYMBOL_GPL(xprt_unregister_transport); 159 160 /** 161 * xprt_load_transport - load a transport implementation 162 * @transport_name: transport to load 163 * 164 * Returns: 165 * 0: transport successfully loaded 166 * -ENOENT: transport module not available 167 */ 168 int xprt_load_transport(const char *transport_name) 169 { 170 struct xprt_class *t; 171 int result; 172 173 result = 0; 174 spin_lock(&xprt_list_lock); 175 list_for_each_entry(t, &xprt_list, list) { 176 if (strcmp(t->name, transport_name) == 0) { 177 spin_unlock(&xprt_list_lock); 178 goto out; 179 } 180 } 181 spin_unlock(&xprt_list_lock); 182 result = request_module("xprt%s", transport_name); 183 out: 184 return result; 185 } 186 EXPORT_SYMBOL_GPL(xprt_load_transport); 187 188 /** 189 * xprt_reserve_xprt - serialize write access to transports 190 * @task: task that is requesting access to the transport 191 * @xprt: pointer to the target transport 192 * 193 * This prevents mixing the payload of separate requests, and prevents 194 * transport connects from colliding with writes. No congestion control 195 * is provided. 196 */ 197 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 198 { 199 struct rpc_rqst *req = task->tk_rqstp; 200 int priority; 201 202 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 203 if (task == xprt->snd_task) 204 return 1; 205 goto out_sleep; 206 } 207 xprt->snd_task = task; 208 if (req != NULL) { 209 req->rq_bytes_sent = 0; 210 req->rq_ntrans++; 211 } 212 213 return 1; 214 215 out_sleep: 216 dprintk("RPC: %5u failed to lock transport %p\n", 217 task->tk_pid, xprt); 218 task->tk_timeout = 0; 219 task->tk_status = -EAGAIN; 220 if (req == NULL) 221 priority = RPC_PRIORITY_LOW; 222 else if (!req->rq_ntrans) 223 priority = RPC_PRIORITY_NORMAL; 224 else 225 priority = RPC_PRIORITY_HIGH; 226 rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); 227 return 0; 228 } 229 EXPORT_SYMBOL_GPL(xprt_reserve_xprt); 230 231 static void xprt_clear_locked(struct rpc_xprt *xprt) 232 { 233 xprt->snd_task = NULL; 234 if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { 235 smp_mb__before_clear_bit(); 236 clear_bit(XPRT_LOCKED, &xprt->state); 237 smp_mb__after_clear_bit(); 238 } else 239 queue_work(rpciod_workqueue, &xprt->task_cleanup); 240 } 241 242 /* 243 * xprt_reserve_xprt_cong - serialize write access to transports 244 * @task: task that is requesting access to the transport 245 * 246 * Same as xprt_reserve_xprt, but Van Jacobson congestion control is 247 * integrated into the decision of whether a request is allowed to be 248 * woken up and given access to the transport. 249 */ 250 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 251 { 252 struct rpc_rqst *req = task->tk_rqstp; 253 int priority; 254 255 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 256 if (task == xprt->snd_task) 257 return 1; 258 goto out_sleep; 259 } 260 if (req == NULL) { 261 xprt->snd_task = task; 262 return 1; 263 } 264 if (__xprt_get_cong(xprt, task)) { 265 xprt->snd_task = task; 266 req->rq_bytes_sent = 0; 267 req->rq_ntrans++; 268 return 1; 269 } 270 xprt_clear_locked(xprt); 271 out_sleep: 272 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 273 task->tk_timeout = 0; 274 task->tk_status = -EAGAIN; 275 if (req == NULL) 276 priority = RPC_PRIORITY_LOW; 277 else if (!req->rq_ntrans) 278 priority = RPC_PRIORITY_NORMAL; 279 else 280 priority = RPC_PRIORITY_HIGH; 281 rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); 282 return 0; 283 } 284 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 285 286 static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 287 { 288 int retval; 289 290 spin_lock_bh(&xprt->transport_lock); 291 retval = xprt->ops->reserve_xprt(xprt, task); 292 spin_unlock_bh(&xprt->transport_lock); 293 return retval; 294 } 295 296 static bool __xprt_lock_write_func(struct rpc_task *task, void *data) 297 { 298 struct rpc_xprt *xprt = data; 299 struct rpc_rqst *req; 300 301 req = task->tk_rqstp; 302 xprt->snd_task = task; 303 if (req) { 304 req->rq_bytes_sent = 0; 305 req->rq_ntrans++; 306 } 307 return true; 308 } 309 310 static void __xprt_lock_write_next(struct rpc_xprt *xprt) 311 { 312 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 313 return; 314 315 if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) 316 return; 317 xprt_clear_locked(xprt); 318 } 319 320 static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) 321 { 322 struct rpc_xprt *xprt = data; 323 struct rpc_rqst *req; 324 325 req = task->tk_rqstp; 326 if (req == NULL) { 327 xprt->snd_task = task; 328 return true; 329 } 330 if (__xprt_get_cong(xprt, task)) { 331 xprt->snd_task = task; 332 req->rq_bytes_sent = 0; 333 req->rq_ntrans++; 334 return true; 335 } 336 return false; 337 } 338 339 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) 340 { 341 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 342 return; 343 if (RPCXPRT_CONGESTED(xprt)) 344 goto out_unlock; 345 if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) 346 return; 347 out_unlock: 348 xprt_clear_locked(xprt); 349 } 350 351 /** 352 * xprt_release_xprt - allow other requests to use a transport 353 * @xprt: transport with other tasks potentially waiting 354 * @task: task that is releasing access to the transport 355 * 356 * Note that "task" can be NULL. No congestion control is provided. 357 */ 358 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 359 { 360 if (xprt->snd_task == task) { 361 xprt_clear_locked(xprt); 362 __xprt_lock_write_next(xprt); 363 } 364 } 365 EXPORT_SYMBOL_GPL(xprt_release_xprt); 366 367 /** 368 * xprt_release_xprt_cong - allow other requests to use a transport 369 * @xprt: transport with other tasks potentially waiting 370 * @task: task that is releasing access to the transport 371 * 372 * Note that "task" can be NULL. Another task is awoken to use the 373 * transport if the transport's congestion window allows it. 374 */ 375 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 376 { 377 if (xprt->snd_task == task) { 378 xprt_clear_locked(xprt); 379 __xprt_lock_write_next_cong(xprt); 380 } 381 } 382 EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); 383 384 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 385 { 386 spin_lock_bh(&xprt->transport_lock); 387 xprt->ops->release_xprt(xprt, task); 388 spin_unlock_bh(&xprt->transport_lock); 389 } 390 391 /* 392 * Van Jacobson congestion avoidance. Check if the congestion window 393 * overflowed. Put the task to sleep if this is the case. 394 */ 395 static int 396 __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) 397 { 398 struct rpc_rqst *req = task->tk_rqstp; 399 400 if (req->rq_cong) 401 return 1; 402 dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", 403 task->tk_pid, xprt->cong, xprt->cwnd); 404 if (RPCXPRT_CONGESTED(xprt)) 405 return 0; 406 req->rq_cong = 1; 407 xprt->cong += RPC_CWNDSCALE; 408 return 1; 409 } 410 411 /* 412 * Adjust the congestion window, and wake up the next task 413 * that has been sleeping due to congestion 414 */ 415 static void 416 __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 417 { 418 if (!req->rq_cong) 419 return; 420 req->rq_cong = 0; 421 xprt->cong -= RPC_CWNDSCALE; 422 __xprt_lock_write_next_cong(xprt); 423 } 424 425 /** 426 * xprt_release_rqst_cong - housekeeping when request is complete 427 * @task: RPC request that recently completed 428 * 429 * Useful for transports that require congestion control. 430 */ 431 void xprt_release_rqst_cong(struct rpc_task *task) 432 { 433 struct rpc_rqst *req = task->tk_rqstp; 434 435 __xprt_put_cong(req->rq_xprt, req); 436 } 437 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); 438 439 /** 440 * xprt_adjust_cwnd - adjust transport congestion window 441 * @xprt: pointer to xprt 442 * @task: recently completed RPC request used to adjust window 443 * @result: result code of completed RPC request 444 * 445 * We use a time-smoothed congestion estimator to avoid heavy oscillation. 446 */ 447 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) 448 { 449 struct rpc_rqst *req = task->tk_rqstp; 450 unsigned long cwnd = xprt->cwnd; 451 452 if (result >= 0 && cwnd <= xprt->cong) { 453 /* The (cwnd >> 1) term makes sure 454 * the result gets rounded properly. */ 455 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; 456 if (cwnd > RPC_MAXCWND(xprt)) 457 cwnd = RPC_MAXCWND(xprt); 458 __xprt_lock_write_next_cong(xprt); 459 } else if (result == -ETIMEDOUT) { 460 cwnd >>= 1; 461 if (cwnd < RPC_CWNDSCALE) 462 cwnd = RPC_CWNDSCALE; 463 } 464 dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", 465 xprt->cong, xprt->cwnd, cwnd); 466 xprt->cwnd = cwnd; 467 __xprt_put_cong(xprt, req); 468 } 469 EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); 470 471 /** 472 * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue 473 * @xprt: transport with waiting tasks 474 * @status: result code to plant in each task before waking it 475 * 476 */ 477 void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) 478 { 479 if (status < 0) 480 rpc_wake_up_status(&xprt->pending, status); 481 else 482 rpc_wake_up(&xprt->pending); 483 } 484 EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); 485 486 /** 487 * xprt_wait_for_buffer_space - wait for transport output buffer to clear 488 * @task: task to be put to sleep 489 * @action: function pointer to be executed after wait 490 * 491 * Note that we only set the timer for the case of RPC_IS_SOFT(), since 492 * we don't in general want to force a socket disconnection due to 493 * an incomplete RPC call transmission. 494 */ 495 void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action) 496 { 497 struct rpc_rqst *req = task->tk_rqstp; 498 struct rpc_xprt *xprt = req->rq_xprt; 499 500 task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0; 501 rpc_sleep_on(&xprt->pending, task, action); 502 } 503 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); 504 505 /** 506 * xprt_write_space - wake the task waiting for transport output buffer space 507 * @xprt: transport with waiting tasks 508 * 509 * Can be called in a soft IRQ context, so xprt_write_space never sleeps. 510 */ 511 void xprt_write_space(struct rpc_xprt *xprt) 512 { 513 spin_lock_bh(&xprt->transport_lock); 514 if (xprt->snd_task) { 515 dprintk("RPC: write space: waking waiting task on " 516 "xprt %p\n", xprt); 517 rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); 518 } 519 spin_unlock_bh(&xprt->transport_lock); 520 } 521 EXPORT_SYMBOL_GPL(xprt_write_space); 522 523 /** 524 * xprt_set_retrans_timeout_def - set a request's retransmit timeout 525 * @task: task whose timeout is to be set 526 * 527 * Set a request's retransmit timeout based on the transport's 528 * default timeout parameters. Used by transports that don't adjust 529 * the retransmit timeout based on round-trip time estimation. 530 */ 531 void xprt_set_retrans_timeout_def(struct rpc_task *task) 532 { 533 task->tk_timeout = task->tk_rqstp->rq_timeout; 534 } 535 EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); 536 537 /** 538 * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout 539 * @task: task whose timeout is to be set 540 * 541 * Set a request's retransmit timeout using the RTT estimator. 542 */ 543 void xprt_set_retrans_timeout_rtt(struct rpc_task *task) 544 { 545 int timer = task->tk_msg.rpc_proc->p_timer; 546 struct rpc_clnt *clnt = task->tk_client; 547 struct rpc_rtt *rtt = clnt->cl_rtt; 548 struct rpc_rqst *req = task->tk_rqstp; 549 unsigned long max_timeout = clnt->cl_timeout->to_maxval; 550 551 task->tk_timeout = rpc_calc_rto(rtt, timer); 552 task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; 553 if (task->tk_timeout > max_timeout || task->tk_timeout == 0) 554 task->tk_timeout = max_timeout; 555 } 556 EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); 557 558 static void xprt_reset_majortimeo(struct rpc_rqst *req) 559 { 560 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 561 562 req->rq_majortimeo = req->rq_timeout; 563 if (to->to_exponential) 564 req->rq_majortimeo <<= to->to_retries; 565 else 566 req->rq_majortimeo += to->to_increment * to->to_retries; 567 if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) 568 req->rq_majortimeo = to->to_maxval; 569 req->rq_majortimeo += jiffies; 570 } 571 572 /** 573 * xprt_adjust_timeout - adjust timeout values for next retransmit 574 * @req: RPC request containing parameters to use for the adjustment 575 * 576 */ 577 int xprt_adjust_timeout(struct rpc_rqst *req) 578 { 579 struct rpc_xprt *xprt = req->rq_xprt; 580 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 581 int status = 0; 582 583 if (time_before(jiffies, req->rq_majortimeo)) { 584 if (to->to_exponential) 585 req->rq_timeout <<= 1; 586 else 587 req->rq_timeout += to->to_increment; 588 if (to->to_maxval && req->rq_timeout >= to->to_maxval) 589 req->rq_timeout = to->to_maxval; 590 req->rq_retries++; 591 } else { 592 req->rq_timeout = to->to_initval; 593 req->rq_retries = 0; 594 xprt_reset_majortimeo(req); 595 /* Reset the RTT counters == "slow start" */ 596 spin_lock_bh(&xprt->transport_lock); 597 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 598 spin_unlock_bh(&xprt->transport_lock); 599 status = -ETIMEDOUT; 600 } 601 602 if (req->rq_timeout == 0) { 603 printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); 604 req->rq_timeout = 5 * HZ; 605 } 606 return status; 607 } 608 609 static void xprt_autoclose(struct work_struct *work) 610 { 611 struct rpc_xprt *xprt = 612 container_of(work, struct rpc_xprt, task_cleanup); 613 614 xprt->ops->close(xprt); 615 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 616 xprt_release_write(xprt, NULL); 617 } 618 619 /** 620 * xprt_disconnect_done - mark a transport as disconnected 621 * @xprt: transport to flag for disconnect 622 * 623 */ 624 void xprt_disconnect_done(struct rpc_xprt *xprt) 625 { 626 dprintk("RPC: disconnected transport %p\n", xprt); 627 spin_lock_bh(&xprt->transport_lock); 628 xprt_clear_connected(xprt); 629 xprt_wake_pending_tasks(xprt, -EAGAIN); 630 spin_unlock_bh(&xprt->transport_lock); 631 } 632 EXPORT_SYMBOL_GPL(xprt_disconnect_done); 633 634 /** 635 * xprt_force_disconnect - force a transport to disconnect 636 * @xprt: transport to disconnect 637 * 638 */ 639 void xprt_force_disconnect(struct rpc_xprt *xprt) 640 { 641 /* Don't race with the test_bit() in xprt_clear_locked() */ 642 spin_lock_bh(&xprt->transport_lock); 643 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 644 /* Try to schedule an autoclose RPC call */ 645 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 646 queue_work(rpciod_workqueue, &xprt->task_cleanup); 647 xprt_wake_pending_tasks(xprt, -EAGAIN); 648 spin_unlock_bh(&xprt->transport_lock); 649 } 650 651 /** 652 * xprt_conditional_disconnect - force a transport to disconnect 653 * @xprt: transport to disconnect 654 * @cookie: 'connection cookie' 655 * 656 * This attempts to break the connection if and only if 'cookie' matches 657 * the current transport 'connection cookie'. It ensures that we don't 658 * try to break the connection more than once when we need to retransmit 659 * a batch of RPC requests. 660 * 661 */ 662 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) 663 { 664 /* Don't race with the test_bit() in xprt_clear_locked() */ 665 spin_lock_bh(&xprt->transport_lock); 666 if (cookie != xprt->connect_cookie) 667 goto out; 668 if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) 669 goto out; 670 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 671 /* Try to schedule an autoclose RPC call */ 672 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 673 queue_work(rpciod_workqueue, &xprt->task_cleanup); 674 xprt_wake_pending_tasks(xprt, -EAGAIN); 675 out: 676 spin_unlock_bh(&xprt->transport_lock); 677 } 678 679 static void 680 xprt_init_autodisconnect(unsigned long data) 681 { 682 struct rpc_xprt *xprt = (struct rpc_xprt *)data; 683 684 spin_lock(&xprt->transport_lock); 685 if (!list_empty(&xprt->recv)) 686 goto out_abort; 687 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 688 goto out_abort; 689 spin_unlock(&xprt->transport_lock); 690 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); 691 queue_work(rpciod_workqueue, &xprt->task_cleanup); 692 return; 693 out_abort: 694 spin_unlock(&xprt->transport_lock); 695 } 696 697 /** 698 * xprt_connect - schedule a transport connect operation 699 * @task: RPC task that is requesting the connect 700 * 701 */ 702 void xprt_connect(struct rpc_task *task) 703 { 704 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; 705 706 dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, 707 xprt, (xprt_connected(xprt) ? "is" : "is not")); 708 709 if (!xprt_bound(xprt)) { 710 task->tk_status = -EAGAIN; 711 return; 712 } 713 if (!xprt_lock_write(xprt, task)) 714 return; 715 716 if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) 717 xprt->ops->close(xprt); 718 719 if (xprt_connected(xprt)) 720 xprt_release_write(xprt, task); 721 else { 722 task->tk_rqstp->rq_bytes_sent = 0; 723 task->tk_timeout = task->tk_rqstp->rq_timeout; 724 rpc_sleep_on(&xprt->pending, task, xprt_connect_status); 725 726 if (test_bit(XPRT_CLOSING, &xprt->state)) 727 return; 728 if (xprt_test_and_set_connecting(xprt)) 729 return; 730 xprt->stat.connect_start = jiffies; 731 xprt->ops->connect(xprt, task); 732 } 733 } 734 735 static void xprt_connect_status(struct rpc_task *task) 736 { 737 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; 738 739 if (task->tk_status == 0) { 740 xprt->stat.connect_count++; 741 xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; 742 dprintk("RPC: %5u xprt_connect_status: connection established\n", 743 task->tk_pid); 744 return; 745 } 746 747 switch (task->tk_status) { 748 case -EAGAIN: 749 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); 750 break; 751 case -ETIMEDOUT: 752 dprintk("RPC: %5u xprt_connect_status: connect attempt timed " 753 "out\n", task->tk_pid); 754 break; 755 default: 756 dprintk("RPC: %5u xprt_connect_status: error %d connecting to " 757 "server %s\n", task->tk_pid, -task->tk_status, 758 xprt->servername); 759 xprt_release_write(xprt, task); 760 task->tk_status = -EIO; 761 } 762 } 763 764 /** 765 * xprt_lookup_rqst - find an RPC request corresponding to an XID 766 * @xprt: transport on which the original request was transmitted 767 * @xid: RPC XID of incoming reply 768 * 769 */ 770 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 771 { 772 struct rpc_rqst *entry; 773 774 list_for_each_entry(entry, &xprt->recv, rq_list) 775 if (entry->rq_xid == xid) 776 return entry; 777 778 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 779 ntohl(xid)); 780 xprt->stat.bad_xids++; 781 return NULL; 782 } 783 EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 784 785 static void xprt_update_rtt(struct rpc_task *task) 786 { 787 struct rpc_rqst *req = task->tk_rqstp; 788 struct rpc_rtt *rtt = task->tk_client->cl_rtt; 789 unsigned int timer = task->tk_msg.rpc_proc->p_timer; 790 long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt)); 791 792 if (timer) { 793 if (req->rq_ntrans == 1) 794 rpc_update_rtt(rtt, timer, m); 795 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); 796 } 797 } 798 799 /** 800 * xprt_complete_rqst - called when reply processing is complete 801 * @task: RPC request that recently completed 802 * @copied: actual number of bytes received from the transport 803 * 804 * Caller holds transport lock. 805 */ 806 void xprt_complete_rqst(struct rpc_task *task, int copied) 807 { 808 struct rpc_rqst *req = task->tk_rqstp; 809 struct rpc_xprt *xprt = req->rq_xprt; 810 811 dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", 812 task->tk_pid, ntohl(req->rq_xid), copied); 813 814 xprt->stat.recvs++; 815 req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); 816 if (xprt->ops->timer != NULL) 817 xprt_update_rtt(task); 818 819 list_del_init(&req->rq_list); 820 req->rq_private_buf.len = copied; 821 /* Ensure all writes are done before we update */ 822 /* req->rq_reply_bytes_recvd */ 823 smp_wmb(); 824 req->rq_reply_bytes_recvd = copied; 825 rpc_wake_up_queued_task(&xprt->pending, task); 826 } 827 EXPORT_SYMBOL_GPL(xprt_complete_rqst); 828 829 static void xprt_timer(struct rpc_task *task) 830 { 831 struct rpc_rqst *req = task->tk_rqstp; 832 struct rpc_xprt *xprt = req->rq_xprt; 833 834 if (task->tk_status != -ETIMEDOUT) 835 return; 836 dprintk("RPC: %5u xprt_timer\n", task->tk_pid); 837 838 spin_lock_bh(&xprt->transport_lock); 839 if (!req->rq_reply_bytes_recvd) { 840 if (xprt->ops->timer) 841 xprt->ops->timer(xprt, task); 842 } else 843 task->tk_status = 0; 844 spin_unlock_bh(&xprt->transport_lock); 845 } 846 847 static inline int xprt_has_timer(struct rpc_xprt *xprt) 848 { 849 return xprt->idle_timeout != 0; 850 } 851 852 /** 853 * xprt_prepare_transmit - reserve the transport before sending a request 854 * @task: RPC task about to send a request 855 * 856 */ 857 int xprt_prepare_transmit(struct rpc_task *task) 858 { 859 struct rpc_rqst *req = task->tk_rqstp; 860 struct rpc_xprt *xprt = req->rq_xprt; 861 int err = 0; 862 863 dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); 864 865 spin_lock_bh(&xprt->transport_lock); 866 if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { 867 err = req->rq_reply_bytes_recvd; 868 goto out_unlock; 869 } 870 if (!xprt->ops->reserve_xprt(xprt, task)) 871 err = -EAGAIN; 872 out_unlock: 873 spin_unlock_bh(&xprt->transport_lock); 874 return err; 875 } 876 877 void xprt_end_transmit(struct rpc_task *task) 878 { 879 xprt_release_write(task->tk_rqstp->rq_xprt, task); 880 } 881 882 /** 883 * xprt_transmit - send an RPC request on a transport 884 * @task: controlling RPC task 885 * 886 * We have to copy the iovec because sendmsg fiddles with its contents. 887 */ 888 void xprt_transmit(struct rpc_task *task) 889 { 890 struct rpc_rqst *req = task->tk_rqstp; 891 struct rpc_xprt *xprt = req->rq_xprt; 892 int status, numreqs; 893 894 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 895 896 if (!req->rq_reply_bytes_recvd) { 897 if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { 898 /* 899 * Add to the list only if we're expecting a reply 900 */ 901 spin_lock_bh(&xprt->transport_lock); 902 /* Update the softirq receive buffer */ 903 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 904 sizeof(req->rq_private_buf)); 905 /* Add request to the receive list */ 906 list_add_tail(&req->rq_list, &xprt->recv); 907 spin_unlock_bh(&xprt->transport_lock); 908 xprt_reset_majortimeo(req); 909 /* Turn off autodisconnect */ 910 del_singleshot_timer_sync(&xprt->timer); 911 } 912 } else if (!req->rq_bytes_sent) 913 return; 914 915 req->rq_connect_cookie = xprt->connect_cookie; 916 req->rq_xtime = ktime_get(); 917 status = xprt->ops->send_request(task); 918 if (status != 0) { 919 task->tk_status = status; 920 return; 921 } 922 923 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 924 task->tk_flags |= RPC_TASK_SENT; 925 spin_lock_bh(&xprt->transport_lock); 926 927 xprt->ops->set_retrans_timeout(task); 928 929 numreqs = atomic_read(&xprt->num_reqs); 930 if (numreqs > xprt->stat.max_slots) 931 xprt->stat.max_slots = numreqs; 932 xprt->stat.sends++; 933 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; 934 xprt->stat.bklog_u += xprt->backlog.qlen; 935 xprt->stat.sending_u += xprt->sending.qlen; 936 xprt->stat.pending_u += xprt->pending.qlen; 937 938 /* Don't race with disconnect */ 939 if (!xprt_connected(xprt)) 940 task->tk_status = -ENOTCONN; 941 else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { 942 /* 943 * Sleep on the pending queue since 944 * we're expecting a reply. 945 */ 946 rpc_sleep_on(&xprt->pending, task, xprt_timer); 947 } 948 spin_unlock_bh(&xprt->transport_lock); 949 } 950 951 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) 952 { 953 struct rpc_rqst *req = ERR_PTR(-EAGAIN); 954 955 if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) 956 goto out; 957 req = kzalloc(sizeof(struct rpc_rqst), gfp_flags); 958 if (req != NULL) 959 goto out; 960 atomic_dec(&xprt->num_reqs); 961 req = ERR_PTR(-ENOMEM); 962 out: 963 return req; 964 } 965 966 static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 967 { 968 if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { 969 kfree(req); 970 return true; 971 } 972 return false; 973 } 974 975 void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) 976 { 977 struct rpc_rqst *req; 978 979 spin_lock(&xprt->reserve_lock); 980 if (!list_empty(&xprt->free)) { 981 req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 982 list_del(&req->rq_list); 983 goto out_init_req; 984 } 985 req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN); 986 if (!IS_ERR(req)) 987 goto out_init_req; 988 switch (PTR_ERR(req)) { 989 case -ENOMEM: 990 dprintk("RPC: dynamic allocation of request slot " 991 "failed! Retrying\n"); 992 task->tk_status = -ENOMEM; 993 break; 994 case -EAGAIN: 995 rpc_sleep_on(&xprt->backlog, task, NULL); 996 dprintk("RPC: waiting for request slot\n"); 997 default: 998 task->tk_status = -EAGAIN; 999 } 1000 spin_unlock(&xprt->reserve_lock); 1001 return; 1002 out_init_req: 1003 task->tk_status = 0; 1004 task->tk_rqstp = req; 1005 xprt_request_init(task, xprt); 1006 spin_unlock(&xprt->reserve_lock); 1007 } 1008 EXPORT_SYMBOL_GPL(xprt_alloc_slot); 1009 1010 void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) 1011 { 1012 /* Note: grabbing the xprt_lock_write() ensures that we throttle 1013 * new slot allocation if the transport is congested (i.e. when 1014 * reconnecting a stream transport or when out of socket write 1015 * buffer space). 1016 */ 1017 if (xprt_lock_write(xprt, task)) { 1018 xprt_alloc_slot(xprt, task); 1019 xprt_release_write(xprt, task); 1020 } 1021 } 1022 EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); 1023 1024 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1025 { 1026 spin_lock(&xprt->reserve_lock); 1027 if (!xprt_dynamic_free_slot(xprt, req)) { 1028 memset(req, 0, sizeof(*req)); /* mark unused */ 1029 list_add(&req->rq_list, &xprt->free); 1030 } 1031 rpc_wake_up_next(&xprt->backlog); 1032 spin_unlock(&xprt->reserve_lock); 1033 } 1034 1035 static void xprt_free_all_slots(struct rpc_xprt *xprt) 1036 { 1037 struct rpc_rqst *req; 1038 while (!list_empty(&xprt->free)) { 1039 req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); 1040 list_del(&req->rq_list); 1041 kfree(req); 1042 } 1043 } 1044 1045 struct rpc_xprt *xprt_alloc(struct net *net, size_t size, 1046 unsigned int num_prealloc, 1047 unsigned int max_alloc) 1048 { 1049 struct rpc_xprt *xprt; 1050 struct rpc_rqst *req; 1051 int i; 1052 1053 xprt = kzalloc(size, GFP_KERNEL); 1054 if (xprt == NULL) 1055 goto out; 1056 1057 xprt_init(xprt, net); 1058 1059 for (i = 0; i < num_prealloc; i++) { 1060 req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); 1061 if (!req) 1062 break; 1063 list_add(&req->rq_list, &xprt->free); 1064 } 1065 if (i < num_prealloc) 1066 goto out_free; 1067 if (max_alloc > num_prealloc) 1068 xprt->max_reqs = max_alloc; 1069 else 1070 xprt->max_reqs = num_prealloc; 1071 xprt->min_reqs = num_prealloc; 1072 atomic_set(&xprt->num_reqs, num_prealloc); 1073 1074 return xprt; 1075 1076 out_free: 1077 xprt_free(xprt); 1078 out: 1079 return NULL; 1080 } 1081 EXPORT_SYMBOL_GPL(xprt_alloc); 1082 1083 void xprt_free(struct rpc_xprt *xprt) 1084 { 1085 put_net(xprt->xprt_net); 1086 xprt_free_all_slots(xprt); 1087 kfree(xprt); 1088 } 1089 EXPORT_SYMBOL_GPL(xprt_free); 1090 1091 /** 1092 * xprt_reserve - allocate an RPC request slot 1093 * @task: RPC task requesting a slot allocation 1094 * 1095 * If no more slots are available, place the task on the transport's 1096 * backlog queue. 1097 */ 1098 void xprt_reserve(struct rpc_task *task) 1099 { 1100 struct rpc_xprt *xprt; 1101 1102 task->tk_status = 0; 1103 if (task->tk_rqstp != NULL) 1104 return; 1105 1106 task->tk_timeout = 0; 1107 task->tk_status = -EAGAIN; 1108 rcu_read_lock(); 1109 xprt = rcu_dereference(task->tk_client->cl_xprt); 1110 xprt->ops->alloc_slot(xprt, task); 1111 rcu_read_unlock(); 1112 } 1113 1114 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 1115 { 1116 return (__force __be32)xprt->xid++; 1117 } 1118 1119 static inline void xprt_init_xid(struct rpc_xprt *xprt) 1120 { 1121 xprt->xid = net_random(); 1122 } 1123 1124 static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) 1125 { 1126 struct rpc_rqst *req = task->tk_rqstp; 1127 1128 INIT_LIST_HEAD(&req->rq_list); 1129 req->rq_timeout = task->tk_client->cl_timeout->to_initval; 1130 req->rq_task = task; 1131 req->rq_xprt = xprt; 1132 req->rq_buffer = NULL; 1133 req->rq_xid = xprt_alloc_xid(xprt); 1134 req->rq_release_snd_buf = NULL; 1135 xprt_reset_majortimeo(req); 1136 dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, 1137 req, ntohl(req->rq_xid)); 1138 } 1139 1140 /** 1141 * xprt_release - release an RPC request slot 1142 * @task: task which is finished with the slot 1143 * 1144 */ 1145 void xprt_release(struct rpc_task *task) 1146 { 1147 struct rpc_xprt *xprt; 1148 struct rpc_rqst *req = task->tk_rqstp; 1149 1150 if (req == NULL) { 1151 if (task->tk_client) { 1152 rcu_read_lock(); 1153 xprt = rcu_dereference(task->tk_client->cl_xprt); 1154 if (xprt->snd_task == task) 1155 xprt_release_write(xprt, task); 1156 rcu_read_unlock(); 1157 } 1158 return; 1159 } 1160 1161 xprt = req->rq_xprt; 1162 if (task->tk_ops->rpc_count_stats != NULL) 1163 task->tk_ops->rpc_count_stats(task, task->tk_calldata); 1164 else if (task->tk_client) 1165 rpc_count_iostats(task, task->tk_client->cl_metrics); 1166 spin_lock_bh(&xprt->transport_lock); 1167 xprt->ops->release_xprt(xprt, task); 1168 if (xprt->ops->release_request) 1169 xprt->ops->release_request(task); 1170 if (!list_empty(&req->rq_list)) 1171 list_del(&req->rq_list); 1172 xprt->last_used = jiffies; 1173 if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) 1174 mod_timer(&xprt->timer, 1175 xprt->last_used + xprt->idle_timeout); 1176 spin_unlock_bh(&xprt->transport_lock); 1177 if (req->rq_buffer) 1178 xprt->ops->buf_free(req->rq_buffer); 1179 if (req->rq_cred != NULL) 1180 put_rpccred(req->rq_cred); 1181 task->tk_rqstp = NULL; 1182 if (req->rq_release_snd_buf) 1183 req->rq_release_snd_buf(req); 1184 1185 dprintk("RPC: %5u release request %p\n", task->tk_pid, req); 1186 if (likely(!bc_prealloc(req))) 1187 xprt_free_slot(xprt, req); 1188 else 1189 xprt_free_bc_request(req); 1190 } 1191 1192 static void xprt_init(struct rpc_xprt *xprt, struct net *net) 1193 { 1194 atomic_set(&xprt->count, 1); 1195 1196 spin_lock_init(&xprt->transport_lock); 1197 spin_lock_init(&xprt->reserve_lock); 1198 1199 INIT_LIST_HEAD(&xprt->free); 1200 INIT_LIST_HEAD(&xprt->recv); 1201 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1202 spin_lock_init(&xprt->bc_pa_lock); 1203 INIT_LIST_HEAD(&xprt->bc_pa_list); 1204 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1205 1206 xprt->last_used = jiffies; 1207 xprt->cwnd = RPC_INITCWND; 1208 xprt->bind_index = 0; 1209 1210 rpc_init_wait_queue(&xprt->binding, "xprt_binding"); 1211 rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 1212 rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending"); 1213 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 1214 1215 xprt_init_xid(xprt); 1216 1217 xprt->xprt_net = get_net(net); 1218 } 1219 1220 /** 1221 * xprt_create_transport - create an RPC transport 1222 * @args: rpc transport creation arguments 1223 * 1224 */ 1225 struct rpc_xprt *xprt_create_transport(struct xprt_create *args) 1226 { 1227 struct rpc_xprt *xprt; 1228 struct xprt_class *t; 1229 1230 spin_lock(&xprt_list_lock); 1231 list_for_each_entry(t, &xprt_list, list) { 1232 if (t->ident == args->ident) { 1233 spin_unlock(&xprt_list_lock); 1234 goto found; 1235 } 1236 } 1237 spin_unlock(&xprt_list_lock); 1238 printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); 1239 return ERR_PTR(-EIO); 1240 1241 found: 1242 xprt = t->setup(args); 1243 if (IS_ERR(xprt)) { 1244 dprintk("RPC: xprt_create_transport: failed, %ld\n", 1245 -PTR_ERR(xprt)); 1246 goto out; 1247 } 1248 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1249 if (xprt_has_timer(xprt)) 1250 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1251 (unsigned long)xprt); 1252 else 1253 init_timer(&xprt->timer); 1254 1255 if (strlen(args->servername) > RPC_MAXNETNAMELEN) { 1256 xprt_destroy(xprt); 1257 return ERR_PTR(-EINVAL); 1258 } 1259 xprt->servername = kstrdup(args->servername, GFP_KERNEL); 1260 if (xprt->servername == NULL) { 1261 xprt_destroy(xprt); 1262 return ERR_PTR(-ENOMEM); 1263 } 1264 1265 dprintk("RPC: created transport %p with %u slots\n", xprt, 1266 xprt->max_reqs); 1267 out: 1268 return xprt; 1269 } 1270 1271 /** 1272 * xprt_destroy - destroy an RPC transport, killing off all requests. 1273 * @xprt: transport to destroy 1274 * 1275 */ 1276 static void xprt_destroy(struct rpc_xprt *xprt) 1277 { 1278 dprintk("RPC: destroying transport %p\n", xprt); 1279 del_timer_sync(&xprt->timer); 1280 1281 rpc_destroy_wait_queue(&xprt->binding); 1282 rpc_destroy_wait_queue(&xprt->pending); 1283 rpc_destroy_wait_queue(&xprt->sending); 1284 rpc_destroy_wait_queue(&xprt->backlog); 1285 cancel_work_sync(&xprt->task_cleanup); 1286 kfree(xprt->servername); 1287 /* 1288 * Tear down transport state and free the rpc_xprt 1289 */ 1290 xprt->ops->destroy(xprt); 1291 } 1292 1293 /** 1294 * xprt_put - release a reference to an RPC transport. 1295 * @xprt: pointer to the transport 1296 * 1297 */ 1298 void xprt_put(struct rpc_xprt *xprt) 1299 { 1300 if (atomic_dec_and_test(&xprt->count)) 1301 xprt_destroy(xprt); 1302 } 1303 1304 /** 1305 * xprt_get - return a reference to an RPC transport. 1306 * @xprt: pointer to the transport 1307 * 1308 */ 1309 struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) 1310 { 1311 if (atomic_inc_not_zero(&xprt->count)) 1312 return xprt; 1313 return NULL; 1314 } 1315