1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/net/sunrpc/xprt.c 4 * 5 * This is a generic RPC call interface supporting congestion avoidance, 6 * and asynchronous calls. 7 * 8 * The interface works like this: 9 * 10 * - When a process places a call, it allocates a request slot if 11 * one is available. Otherwise, it sleeps on the backlog queue 12 * (xprt_reserve). 13 * - Next, the caller puts together the RPC message, stuffs it into 14 * the request struct, and calls xprt_transmit(). 15 * - xprt_transmit sends the message and installs the caller on the 16 * transport's wait list. At the same time, if a reply is expected, 17 * it installs a timer that is run after the packet's timeout has 18 * expired. 19 * - When a packet arrives, the data_ready handler walks the list of 20 * pending requests for that transport. If a matching XID is found, the 21 * caller is woken up, and the timer removed. 22 * - When no reply arrives within the timeout interval, the timer is 23 * fired by the kernel and runs xprt_timer(). It either adjusts the 24 * timeout values (minor timeout) or wakes up the caller with a status 25 * of -ETIMEDOUT. 26 * - When the caller receives a notification from RPC that a reply arrived, 27 * it should release the RPC slot, and process the reply. 28 * If the call timed out, it may choose to retry the operation by 29 * adjusting the initial timeout value, and simply calling rpc_call 30 * again. 31 * 32 * Support for async RPC is done through a set of RPC-specific scheduling 33 * primitives that `transparently' work for processes as well as async 34 * tasks that rely on callbacks. 35 * 36 * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> 37 * 38 * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> 39 */ 40 41 #include <linux/module.h> 42 43 #include <linux/types.h> 44 #include <linux/interrupt.h> 45 #include <linux/workqueue.h> 46 #include <linux/net.h> 47 #include <linux/ktime.h> 48 49 #include <linux/sunrpc/clnt.h> 50 #include <linux/sunrpc/metrics.h> 51 #include <linux/sunrpc/bc_xprt.h> 52 #include <linux/rcupdate.h> 53 #include <linux/sched/mm.h> 54 55 #include <trace/events/sunrpc.h> 56 57 #include "sunrpc.h" 58 59 /* 60 * Local variables 61 */ 62 63 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 64 # define RPCDBG_FACILITY RPCDBG_XPRT 65 #endif 66 67 /* 68 * Local functions 69 */ 70 static void xprt_init(struct rpc_xprt *xprt, struct net *net); 71 static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); 72 static void xprt_destroy(struct rpc_xprt *xprt); 73 74 static DEFINE_SPINLOCK(xprt_list_lock); 75 static LIST_HEAD(xprt_list); 76 77 static unsigned long xprt_request_timeout(const struct rpc_rqst *req) 78 { 79 unsigned long timeout = jiffies + req->rq_timeout; 80 81 if (time_before(timeout, req->rq_majortimeo)) 82 return timeout; 83 return req->rq_majortimeo; 84 } 85 86 /** 87 * xprt_register_transport - register a transport implementation 88 * @transport: transport to register 89 * 90 * If a transport implementation is loaded as a kernel module, it can 91 * call this interface to make itself known to the RPC client. 92 * 93 * Returns: 94 * 0: transport successfully registered 95 * -EEXIST: transport already registered 96 * -EINVAL: transport module being unloaded 97 */ 98 int xprt_register_transport(struct xprt_class *transport) 99 { 100 struct xprt_class *t; 101 int result; 102 103 result = -EEXIST; 104 spin_lock(&xprt_list_lock); 105 list_for_each_entry(t, &xprt_list, list) { 106 /* don't register the same transport class twice */ 107 if (t->ident == transport->ident) 108 goto out; 109 } 110 111 list_add_tail(&transport->list, &xprt_list); 112 printk(KERN_INFO "RPC: Registered %s transport module.\n", 113 transport->name); 114 result = 0; 115 116 out: 117 spin_unlock(&xprt_list_lock); 118 return result; 119 } 120 EXPORT_SYMBOL_GPL(xprt_register_transport); 121 122 /** 123 * xprt_unregister_transport - unregister a transport implementation 124 * @transport: transport to unregister 125 * 126 * Returns: 127 * 0: transport successfully unregistered 128 * -ENOENT: transport never registered 129 */ 130 int xprt_unregister_transport(struct xprt_class *transport) 131 { 132 struct xprt_class *t; 133 int result; 134 135 result = 0; 136 spin_lock(&xprt_list_lock); 137 list_for_each_entry(t, &xprt_list, list) { 138 if (t == transport) { 139 printk(KERN_INFO 140 "RPC: Unregistered %s transport module.\n", 141 transport->name); 142 list_del_init(&transport->list); 143 goto out; 144 } 145 } 146 result = -ENOENT; 147 148 out: 149 spin_unlock(&xprt_list_lock); 150 return result; 151 } 152 EXPORT_SYMBOL_GPL(xprt_unregister_transport); 153 154 /** 155 * xprt_load_transport - load a transport implementation 156 * @transport_name: transport to load 157 * 158 * Returns: 159 * 0: transport successfully loaded 160 * -ENOENT: transport module not available 161 */ 162 int xprt_load_transport(const char *transport_name) 163 { 164 struct xprt_class *t; 165 int result; 166 167 result = 0; 168 spin_lock(&xprt_list_lock); 169 list_for_each_entry(t, &xprt_list, list) { 170 if (strcmp(t->name, transport_name) == 0) { 171 spin_unlock(&xprt_list_lock); 172 goto out; 173 } 174 } 175 spin_unlock(&xprt_list_lock); 176 result = request_module("xprt%s", transport_name); 177 out: 178 return result; 179 } 180 EXPORT_SYMBOL_GPL(xprt_load_transport); 181 182 static void xprt_clear_locked(struct rpc_xprt *xprt) 183 { 184 xprt->snd_task = NULL; 185 if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { 186 smp_mb__before_atomic(); 187 clear_bit(XPRT_LOCKED, &xprt->state); 188 smp_mb__after_atomic(); 189 } else 190 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 191 } 192 193 /** 194 * xprt_reserve_xprt - serialize write access to transports 195 * @task: task that is requesting access to the transport 196 * @xprt: pointer to the target transport 197 * 198 * This prevents mixing the payload of separate requests, and prevents 199 * transport connects from colliding with writes. No congestion control 200 * is provided. 201 */ 202 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 203 { 204 struct rpc_rqst *req = task->tk_rqstp; 205 206 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 207 if (task == xprt->snd_task) 208 return 1; 209 goto out_sleep; 210 } 211 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 212 goto out_unlock; 213 xprt->snd_task = task; 214 215 return 1; 216 217 out_unlock: 218 xprt_clear_locked(xprt); 219 out_sleep: 220 dprintk("RPC: %5u failed to lock transport %p\n", 221 task->tk_pid, xprt); 222 task->tk_status = -EAGAIN; 223 if (RPC_IS_SOFT(task)) 224 rpc_sleep_on_timeout(&xprt->sending, task, NULL, 225 xprt_request_timeout(req)); 226 else 227 rpc_sleep_on(&xprt->sending, task, NULL); 228 return 0; 229 } 230 EXPORT_SYMBOL_GPL(xprt_reserve_xprt); 231 232 static bool 233 xprt_need_congestion_window_wait(struct rpc_xprt *xprt) 234 { 235 return test_bit(XPRT_CWND_WAIT, &xprt->state); 236 } 237 238 static void 239 xprt_set_congestion_window_wait(struct rpc_xprt *xprt) 240 { 241 if (!list_empty(&xprt->xmit_queue)) { 242 /* Peek at head of queue to see if it can make progress */ 243 if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst, 244 rq_xmit)->rq_cong) 245 return; 246 } 247 set_bit(XPRT_CWND_WAIT, &xprt->state); 248 } 249 250 static void 251 xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt) 252 { 253 if (!RPCXPRT_CONGESTED(xprt)) 254 clear_bit(XPRT_CWND_WAIT, &xprt->state); 255 } 256 257 /* 258 * xprt_reserve_xprt_cong - serialize write access to transports 259 * @task: task that is requesting access to the transport 260 * 261 * Same as xprt_reserve_xprt, but Van Jacobson congestion control is 262 * integrated into the decision of whether a request is allowed to be 263 * woken up and given access to the transport. 264 * Note that the lock is only granted if we know there are free slots. 265 */ 266 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 267 { 268 struct rpc_rqst *req = task->tk_rqstp; 269 270 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 271 if (task == xprt->snd_task) 272 return 1; 273 goto out_sleep; 274 } 275 if (req == NULL) { 276 xprt->snd_task = task; 277 return 1; 278 } 279 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 280 goto out_unlock; 281 if (!xprt_need_congestion_window_wait(xprt)) { 282 xprt->snd_task = task; 283 return 1; 284 } 285 out_unlock: 286 xprt_clear_locked(xprt); 287 out_sleep: 288 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 289 task->tk_status = -EAGAIN; 290 if (RPC_IS_SOFT(task)) 291 rpc_sleep_on_timeout(&xprt->sending, task, NULL, 292 xprt_request_timeout(req)); 293 else 294 rpc_sleep_on(&xprt->sending, task, NULL); 295 return 0; 296 } 297 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 298 299 static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) 300 { 301 int retval; 302 303 if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task) 304 return 1; 305 spin_lock(&xprt->transport_lock); 306 retval = xprt->ops->reserve_xprt(xprt, task); 307 spin_unlock(&xprt->transport_lock); 308 return retval; 309 } 310 311 static bool __xprt_lock_write_func(struct rpc_task *task, void *data) 312 { 313 struct rpc_xprt *xprt = data; 314 315 xprt->snd_task = task; 316 return true; 317 } 318 319 static void __xprt_lock_write_next(struct rpc_xprt *xprt) 320 { 321 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 322 return; 323 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 324 goto out_unlock; 325 if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, 326 __xprt_lock_write_func, xprt)) 327 return; 328 out_unlock: 329 xprt_clear_locked(xprt); 330 } 331 332 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) 333 { 334 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 335 return; 336 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 337 goto out_unlock; 338 if (xprt_need_congestion_window_wait(xprt)) 339 goto out_unlock; 340 if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, 341 __xprt_lock_write_func, xprt)) 342 return; 343 out_unlock: 344 xprt_clear_locked(xprt); 345 } 346 347 /** 348 * xprt_release_xprt - allow other requests to use a transport 349 * @xprt: transport with other tasks potentially waiting 350 * @task: task that is releasing access to the transport 351 * 352 * Note that "task" can be NULL. No congestion control is provided. 353 */ 354 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 355 { 356 if (xprt->snd_task == task) { 357 xprt_clear_locked(xprt); 358 __xprt_lock_write_next(xprt); 359 } 360 } 361 EXPORT_SYMBOL_GPL(xprt_release_xprt); 362 363 /** 364 * xprt_release_xprt_cong - allow other requests to use a transport 365 * @xprt: transport with other tasks potentially waiting 366 * @task: task that is releasing access to the transport 367 * 368 * Note that "task" can be NULL. Another task is awoken to use the 369 * transport if the transport's congestion window allows it. 370 */ 371 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 372 { 373 if (xprt->snd_task == task) { 374 xprt_clear_locked(xprt); 375 __xprt_lock_write_next_cong(xprt); 376 } 377 } 378 EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); 379 380 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) 381 { 382 if (xprt->snd_task != task) 383 return; 384 spin_lock(&xprt->transport_lock); 385 xprt->ops->release_xprt(xprt, task); 386 spin_unlock(&xprt->transport_lock); 387 } 388 389 /* 390 * Van Jacobson congestion avoidance. Check if the congestion window 391 * overflowed. Put the task to sleep if this is the case. 392 */ 393 static int 394 __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 395 { 396 if (req->rq_cong) 397 return 1; 398 dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", 399 req->rq_task->tk_pid, xprt->cong, xprt->cwnd); 400 if (RPCXPRT_CONGESTED(xprt)) { 401 xprt_set_congestion_window_wait(xprt); 402 return 0; 403 } 404 req->rq_cong = 1; 405 xprt->cong += RPC_CWNDSCALE; 406 return 1; 407 } 408 409 /* 410 * Adjust the congestion window, and wake up the next task 411 * that has been sleeping due to congestion 412 */ 413 static void 414 __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 415 { 416 if (!req->rq_cong) 417 return; 418 req->rq_cong = 0; 419 xprt->cong -= RPC_CWNDSCALE; 420 xprt_test_and_clear_congestion_window_wait(xprt); 421 __xprt_lock_write_next_cong(xprt); 422 } 423 424 /** 425 * xprt_request_get_cong - Request congestion control credits 426 * @xprt: pointer to transport 427 * @req: pointer to RPC request 428 * 429 * Useful for transports that require congestion control. 430 */ 431 bool 432 xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) 433 { 434 bool ret = false; 435 436 if (req->rq_cong) 437 return true; 438 spin_lock(&xprt->transport_lock); 439 ret = __xprt_get_cong(xprt, req) != 0; 440 spin_unlock(&xprt->transport_lock); 441 return ret; 442 } 443 EXPORT_SYMBOL_GPL(xprt_request_get_cong); 444 445 /** 446 * xprt_release_rqst_cong - housekeeping when request is complete 447 * @task: RPC request that recently completed 448 * 449 * Useful for transports that require congestion control. 450 */ 451 void xprt_release_rqst_cong(struct rpc_task *task) 452 { 453 struct rpc_rqst *req = task->tk_rqstp; 454 455 __xprt_put_cong(req->rq_xprt, req); 456 } 457 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); 458 459 /* 460 * Clear the congestion window wait flag and wake up the next 461 * entry on xprt->sending 462 */ 463 static void 464 xprt_clear_congestion_window_wait(struct rpc_xprt *xprt) 465 { 466 if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) { 467 spin_lock(&xprt->transport_lock); 468 __xprt_lock_write_next_cong(xprt); 469 spin_unlock(&xprt->transport_lock); 470 } 471 } 472 473 /** 474 * xprt_adjust_cwnd - adjust transport congestion window 475 * @xprt: pointer to xprt 476 * @task: recently completed RPC request used to adjust window 477 * @result: result code of completed RPC request 478 * 479 * The transport code maintains an estimate on the maximum number of out- 480 * standing RPC requests, using a smoothed version of the congestion 481 * avoidance implemented in 44BSD. This is basically the Van Jacobson 482 * congestion algorithm: If a retransmit occurs, the congestion window is 483 * halved; otherwise, it is incremented by 1/cwnd when 484 * 485 * - a reply is received and 486 * - a full number of requests are outstanding and 487 * - the congestion window hasn't been updated recently. 488 */ 489 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) 490 { 491 struct rpc_rqst *req = task->tk_rqstp; 492 unsigned long cwnd = xprt->cwnd; 493 494 if (result >= 0 && cwnd <= xprt->cong) { 495 /* The (cwnd >> 1) term makes sure 496 * the result gets rounded properly. */ 497 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; 498 if (cwnd > RPC_MAXCWND(xprt)) 499 cwnd = RPC_MAXCWND(xprt); 500 __xprt_lock_write_next_cong(xprt); 501 } else if (result == -ETIMEDOUT) { 502 cwnd >>= 1; 503 if (cwnd < RPC_CWNDSCALE) 504 cwnd = RPC_CWNDSCALE; 505 } 506 dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", 507 xprt->cong, xprt->cwnd, cwnd); 508 xprt->cwnd = cwnd; 509 __xprt_put_cong(xprt, req); 510 } 511 EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); 512 513 /** 514 * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue 515 * @xprt: transport with waiting tasks 516 * @status: result code to plant in each task before waking it 517 * 518 */ 519 void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) 520 { 521 if (status < 0) 522 rpc_wake_up_status(&xprt->pending, status); 523 else 524 rpc_wake_up(&xprt->pending); 525 } 526 EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); 527 528 /** 529 * xprt_wait_for_buffer_space - wait for transport output buffer to clear 530 * @xprt: transport 531 * 532 * Note that we only set the timer for the case of RPC_IS_SOFT(), since 533 * we don't in general want to force a socket disconnection due to 534 * an incomplete RPC call transmission. 535 */ 536 void xprt_wait_for_buffer_space(struct rpc_xprt *xprt) 537 { 538 set_bit(XPRT_WRITE_SPACE, &xprt->state); 539 } 540 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); 541 542 static bool 543 xprt_clear_write_space_locked(struct rpc_xprt *xprt) 544 { 545 if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) { 546 __xprt_lock_write_next(xprt); 547 dprintk("RPC: write space: waking waiting task on " 548 "xprt %p\n", xprt); 549 return true; 550 } 551 return false; 552 } 553 554 /** 555 * xprt_write_space - wake the task waiting for transport output buffer space 556 * @xprt: transport with waiting tasks 557 * 558 * Can be called in a soft IRQ context, so xprt_write_space never sleeps. 559 */ 560 bool xprt_write_space(struct rpc_xprt *xprt) 561 { 562 bool ret; 563 564 if (!test_bit(XPRT_WRITE_SPACE, &xprt->state)) 565 return false; 566 spin_lock(&xprt->transport_lock); 567 ret = xprt_clear_write_space_locked(xprt); 568 spin_unlock(&xprt->transport_lock); 569 return ret; 570 } 571 EXPORT_SYMBOL_GPL(xprt_write_space); 572 573 static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime) 574 { 575 s64 delta = ktime_to_ns(ktime_get() - abstime); 576 return likely(delta >= 0) ? 577 jiffies - nsecs_to_jiffies(delta) : 578 jiffies + nsecs_to_jiffies(-delta); 579 } 580 581 static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) 582 { 583 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 584 unsigned long majortimeo = req->rq_timeout; 585 586 if (to->to_exponential) 587 majortimeo <<= to->to_retries; 588 else 589 majortimeo += to->to_increment * to->to_retries; 590 if (majortimeo > to->to_maxval || majortimeo == 0) 591 majortimeo = to->to_maxval; 592 return majortimeo; 593 } 594 595 static void xprt_reset_majortimeo(struct rpc_rqst *req) 596 { 597 req->rq_majortimeo += xprt_calc_majortimeo(req); 598 } 599 600 static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) 601 { 602 unsigned long time_init; 603 struct rpc_xprt *xprt = req->rq_xprt; 604 605 if (likely(xprt && xprt_connected(xprt))) 606 time_init = jiffies; 607 else 608 time_init = xprt_abs_ktime_to_jiffies(task->tk_start); 609 req->rq_timeout = task->tk_client->cl_timeout->to_initval; 610 req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); 611 } 612 613 /** 614 * xprt_adjust_timeout - adjust timeout values for next retransmit 615 * @req: RPC request containing parameters to use for the adjustment 616 * 617 */ 618 int xprt_adjust_timeout(struct rpc_rqst *req) 619 { 620 struct rpc_xprt *xprt = req->rq_xprt; 621 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 622 int status = 0; 623 624 if (time_before(jiffies, req->rq_majortimeo)) { 625 if (to->to_exponential) 626 req->rq_timeout <<= 1; 627 else 628 req->rq_timeout += to->to_increment; 629 if (to->to_maxval && req->rq_timeout >= to->to_maxval) 630 req->rq_timeout = to->to_maxval; 631 req->rq_retries++; 632 } else { 633 req->rq_timeout = to->to_initval; 634 req->rq_retries = 0; 635 xprt_reset_majortimeo(req); 636 /* Reset the RTT counters == "slow start" */ 637 spin_lock(&xprt->transport_lock); 638 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 639 spin_unlock(&xprt->transport_lock); 640 status = -ETIMEDOUT; 641 } 642 643 if (req->rq_timeout == 0) { 644 printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); 645 req->rq_timeout = 5 * HZ; 646 } 647 return status; 648 } 649 650 static void xprt_autoclose(struct work_struct *work) 651 { 652 struct rpc_xprt *xprt = 653 container_of(work, struct rpc_xprt, task_cleanup); 654 unsigned int pflags = memalloc_nofs_save(); 655 656 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 657 xprt->ops->close(xprt); 658 xprt_release_write(xprt, NULL); 659 wake_up_bit(&xprt->state, XPRT_LOCKED); 660 memalloc_nofs_restore(pflags); 661 } 662 663 /** 664 * xprt_disconnect_done - mark a transport as disconnected 665 * @xprt: transport to flag for disconnect 666 * 667 */ 668 void xprt_disconnect_done(struct rpc_xprt *xprt) 669 { 670 dprintk("RPC: disconnected transport %p\n", xprt); 671 spin_lock(&xprt->transport_lock); 672 xprt_clear_connected(xprt); 673 xprt_clear_write_space_locked(xprt); 674 xprt_wake_pending_tasks(xprt, -ENOTCONN); 675 spin_unlock(&xprt->transport_lock); 676 } 677 EXPORT_SYMBOL_GPL(xprt_disconnect_done); 678 679 /** 680 * xprt_force_disconnect - force a transport to disconnect 681 * @xprt: transport to disconnect 682 * 683 */ 684 void xprt_force_disconnect(struct rpc_xprt *xprt) 685 { 686 /* Don't race with the test_bit() in xprt_clear_locked() */ 687 spin_lock(&xprt->transport_lock); 688 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 689 /* Try to schedule an autoclose RPC call */ 690 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 691 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 692 else if (xprt->snd_task) 693 rpc_wake_up_queued_task_set_status(&xprt->pending, 694 xprt->snd_task, -ENOTCONN); 695 spin_unlock(&xprt->transport_lock); 696 } 697 EXPORT_SYMBOL_GPL(xprt_force_disconnect); 698 699 static unsigned int 700 xprt_connect_cookie(struct rpc_xprt *xprt) 701 { 702 return READ_ONCE(xprt->connect_cookie); 703 } 704 705 static bool 706 xprt_request_retransmit_after_disconnect(struct rpc_task *task) 707 { 708 struct rpc_rqst *req = task->tk_rqstp; 709 struct rpc_xprt *xprt = req->rq_xprt; 710 711 return req->rq_connect_cookie != xprt_connect_cookie(xprt) || 712 !xprt_connected(xprt); 713 } 714 715 /** 716 * xprt_conditional_disconnect - force a transport to disconnect 717 * @xprt: transport to disconnect 718 * @cookie: 'connection cookie' 719 * 720 * This attempts to break the connection if and only if 'cookie' matches 721 * the current transport 'connection cookie'. It ensures that we don't 722 * try to break the connection more than once when we need to retransmit 723 * a batch of RPC requests. 724 * 725 */ 726 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) 727 { 728 /* Don't race with the test_bit() in xprt_clear_locked() */ 729 spin_lock(&xprt->transport_lock); 730 if (cookie != xprt->connect_cookie) 731 goto out; 732 if (test_bit(XPRT_CLOSING, &xprt->state)) 733 goto out; 734 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 735 /* Try to schedule an autoclose RPC call */ 736 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 737 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 738 xprt_wake_pending_tasks(xprt, -EAGAIN); 739 out: 740 spin_unlock(&xprt->transport_lock); 741 } 742 743 static bool 744 xprt_has_timer(const struct rpc_xprt *xprt) 745 { 746 return xprt->idle_timeout != 0; 747 } 748 749 static void 750 xprt_schedule_autodisconnect(struct rpc_xprt *xprt) 751 __must_hold(&xprt->transport_lock) 752 { 753 xprt->last_used = jiffies; 754 if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt)) 755 mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); 756 } 757 758 static void 759 xprt_init_autodisconnect(struct timer_list *t) 760 { 761 struct rpc_xprt *xprt = from_timer(xprt, t, timer); 762 763 if (!RB_EMPTY_ROOT(&xprt->recv_queue)) 764 return; 765 /* Reset xprt->last_used to avoid connect/autodisconnect cycling */ 766 xprt->last_used = jiffies; 767 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 768 return; 769 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 770 } 771 772 bool xprt_lock_connect(struct rpc_xprt *xprt, 773 struct rpc_task *task, 774 void *cookie) 775 { 776 bool ret = false; 777 778 spin_lock(&xprt->transport_lock); 779 if (!test_bit(XPRT_LOCKED, &xprt->state)) 780 goto out; 781 if (xprt->snd_task != task) 782 goto out; 783 xprt->snd_task = cookie; 784 ret = true; 785 out: 786 spin_unlock(&xprt->transport_lock); 787 return ret; 788 } 789 790 void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) 791 { 792 spin_lock(&xprt->transport_lock); 793 if (xprt->snd_task != cookie) 794 goto out; 795 if (!test_bit(XPRT_LOCKED, &xprt->state)) 796 goto out; 797 xprt->snd_task =NULL; 798 xprt->ops->release_xprt(xprt, NULL); 799 xprt_schedule_autodisconnect(xprt); 800 out: 801 spin_unlock(&xprt->transport_lock); 802 wake_up_bit(&xprt->state, XPRT_LOCKED); 803 } 804 805 /** 806 * xprt_connect - schedule a transport connect operation 807 * @task: RPC task that is requesting the connect 808 * 809 */ 810 void xprt_connect(struct rpc_task *task) 811 { 812 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; 813 814 dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, 815 xprt, (xprt_connected(xprt) ? "is" : "is not")); 816 817 if (!xprt_bound(xprt)) { 818 task->tk_status = -EAGAIN; 819 return; 820 } 821 if (!xprt_lock_write(xprt, task)) 822 return; 823 824 if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) 825 xprt->ops->close(xprt); 826 827 if (!xprt_connected(xprt)) { 828 task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; 829 rpc_sleep_on_timeout(&xprt->pending, task, NULL, 830 xprt_request_timeout(task->tk_rqstp)); 831 832 if (test_bit(XPRT_CLOSING, &xprt->state)) 833 return; 834 if (xprt_test_and_set_connecting(xprt)) 835 return; 836 /* Race breaker */ 837 if (!xprt_connected(xprt)) { 838 xprt->stat.connect_start = jiffies; 839 xprt->ops->connect(xprt, task); 840 } else { 841 xprt_clear_connecting(xprt); 842 task->tk_status = 0; 843 rpc_wake_up_queued_task(&xprt->pending, task); 844 } 845 } 846 xprt_release_write(xprt, task); 847 } 848 849 enum xprt_xid_rb_cmp { 850 XID_RB_EQUAL, 851 XID_RB_LEFT, 852 XID_RB_RIGHT, 853 }; 854 static enum xprt_xid_rb_cmp 855 xprt_xid_cmp(__be32 xid1, __be32 xid2) 856 { 857 if (xid1 == xid2) 858 return XID_RB_EQUAL; 859 if ((__force u32)xid1 < (__force u32)xid2) 860 return XID_RB_LEFT; 861 return XID_RB_RIGHT; 862 } 863 864 static struct rpc_rqst * 865 xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid) 866 { 867 struct rb_node *n = xprt->recv_queue.rb_node; 868 struct rpc_rqst *req; 869 870 while (n != NULL) { 871 req = rb_entry(n, struct rpc_rqst, rq_recv); 872 switch (xprt_xid_cmp(xid, req->rq_xid)) { 873 case XID_RB_LEFT: 874 n = n->rb_left; 875 break; 876 case XID_RB_RIGHT: 877 n = n->rb_right; 878 break; 879 case XID_RB_EQUAL: 880 return req; 881 } 882 } 883 return NULL; 884 } 885 886 static void 887 xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new) 888 { 889 struct rb_node **p = &xprt->recv_queue.rb_node; 890 struct rb_node *n = NULL; 891 struct rpc_rqst *req; 892 893 while (*p != NULL) { 894 n = *p; 895 req = rb_entry(n, struct rpc_rqst, rq_recv); 896 switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) { 897 case XID_RB_LEFT: 898 p = &n->rb_left; 899 break; 900 case XID_RB_RIGHT: 901 p = &n->rb_right; 902 break; 903 case XID_RB_EQUAL: 904 WARN_ON_ONCE(new != req); 905 return; 906 } 907 } 908 rb_link_node(&new->rq_recv, n, p); 909 rb_insert_color(&new->rq_recv, &xprt->recv_queue); 910 } 911 912 static void 913 xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req) 914 { 915 rb_erase(&req->rq_recv, &xprt->recv_queue); 916 } 917 918 /** 919 * xprt_lookup_rqst - find an RPC request corresponding to an XID 920 * @xprt: transport on which the original request was transmitted 921 * @xid: RPC XID of incoming reply 922 * 923 * Caller holds xprt->queue_lock. 924 */ 925 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 926 { 927 struct rpc_rqst *entry; 928 929 entry = xprt_request_rb_find(xprt, xid); 930 if (entry != NULL) { 931 trace_xprt_lookup_rqst(xprt, xid, 0); 932 entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); 933 return entry; 934 } 935 936 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 937 ntohl(xid)); 938 trace_xprt_lookup_rqst(xprt, xid, -ENOENT); 939 xprt->stat.bad_xids++; 940 return NULL; 941 } 942 EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 943 944 static bool 945 xprt_is_pinned_rqst(struct rpc_rqst *req) 946 { 947 return atomic_read(&req->rq_pin) != 0; 948 } 949 950 /** 951 * xprt_pin_rqst - Pin a request on the transport receive list 952 * @req: Request to pin 953 * 954 * Caller must ensure this is atomic with the call to xprt_lookup_rqst() 955 * so should be holding xprt->queue_lock. 956 */ 957 void xprt_pin_rqst(struct rpc_rqst *req) 958 { 959 atomic_inc(&req->rq_pin); 960 } 961 EXPORT_SYMBOL_GPL(xprt_pin_rqst); 962 963 /** 964 * xprt_unpin_rqst - Unpin a request on the transport receive list 965 * @req: Request to pin 966 * 967 * Caller should be holding xprt->queue_lock. 968 */ 969 void xprt_unpin_rqst(struct rpc_rqst *req) 970 { 971 if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) { 972 atomic_dec(&req->rq_pin); 973 return; 974 } 975 if (atomic_dec_and_test(&req->rq_pin)) 976 wake_up_var(&req->rq_pin); 977 } 978 EXPORT_SYMBOL_GPL(xprt_unpin_rqst); 979 980 static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) 981 { 982 wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req)); 983 } 984 985 static bool 986 xprt_request_data_received(struct rpc_task *task) 987 { 988 return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && 989 READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0; 990 } 991 992 static bool 993 xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req) 994 { 995 return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) && 996 READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0; 997 } 998 999 /** 1000 * xprt_request_enqueue_receive - Add an request to the receive queue 1001 * @task: RPC task 1002 * 1003 */ 1004 void 1005 xprt_request_enqueue_receive(struct rpc_task *task) 1006 { 1007 struct rpc_rqst *req = task->tk_rqstp; 1008 struct rpc_xprt *xprt = req->rq_xprt; 1009 1010 if (!xprt_request_need_enqueue_receive(task, req)) 1011 return; 1012 spin_lock(&xprt->queue_lock); 1013 1014 /* Update the softirq receive buffer */ 1015 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1016 sizeof(req->rq_private_buf)); 1017 1018 /* Add request to the receive list */ 1019 xprt_request_rb_insert(xprt, req); 1020 set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate); 1021 spin_unlock(&xprt->queue_lock); 1022 1023 /* Turn off autodisconnect */ 1024 del_singleshot_timer_sync(&xprt->timer); 1025 } 1026 1027 /** 1028 * xprt_request_dequeue_receive_locked - Remove a request from the receive queue 1029 * @task: RPC task 1030 * 1031 * Caller must hold xprt->queue_lock. 1032 */ 1033 static void 1034 xprt_request_dequeue_receive_locked(struct rpc_task *task) 1035 { 1036 struct rpc_rqst *req = task->tk_rqstp; 1037 1038 if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) 1039 xprt_request_rb_remove(req->rq_xprt, req); 1040 } 1041 1042 /** 1043 * xprt_update_rtt - Update RPC RTT statistics 1044 * @task: RPC request that recently completed 1045 * 1046 * Caller holds xprt->queue_lock. 1047 */ 1048 void xprt_update_rtt(struct rpc_task *task) 1049 { 1050 struct rpc_rqst *req = task->tk_rqstp; 1051 struct rpc_rtt *rtt = task->tk_client->cl_rtt; 1052 unsigned int timer = task->tk_msg.rpc_proc->p_timer; 1053 long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt)); 1054 1055 if (timer) { 1056 if (req->rq_ntrans == 1) 1057 rpc_update_rtt(rtt, timer, m); 1058 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); 1059 } 1060 } 1061 EXPORT_SYMBOL_GPL(xprt_update_rtt); 1062 1063 /** 1064 * xprt_complete_rqst - called when reply processing is complete 1065 * @task: RPC request that recently completed 1066 * @copied: actual number of bytes received from the transport 1067 * 1068 * Caller holds xprt->queue_lock. 1069 */ 1070 void xprt_complete_rqst(struct rpc_task *task, int copied) 1071 { 1072 struct rpc_rqst *req = task->tk_rqstp; 1073 struct rpc_xprt *xprt = req->rq_xprt; 1074 1075 dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", 1076 task->tk_pid, ntohl(req->rq_xid), copied); 1077 trace_xprt_complete_rqst(xprt, req->rq_xid, copied); 1078 1079 xprt->stat.recvs++; 1080 1081 req->rq_private_buf.len = copied; 1082 /* Ensure all writes are done before we update */ 1083 /* req->rq_reply_bytes_recvd */ 1084 smp_wmb(); 1085 req->rq_reply_bytes_recvd = copied; 1086 xprt_request_dequeue_receive_locked(task); 1087 rpc_wake_up_queued_task(&xprt->pending, task); 1088 } 1089 EXPORT_SYMBOL_GPL(xprt_complete_rqst); 1090 1091 static void xprt_timer(struct rpc_task *task) 1092 { 1093 struct rpc_rqst *req = task->tk_rqstp; 1094 struct rpc_xprt *xprt = req->rq_xprt; 1095 1096 if (task->tk_status != -ETIMEDOUT) 1097 return; 1098 1099 trace_xprt_timer(xprt, req->rq_xid, task->tk_status); 1100 if (!req->rq_reply_bytes_recvd) { 1101 if (xprt->ops->timer) 1102 xprt->ops->timer(xprt, task); 1103 } else 1104 task->tk_status = 0; 1105 } 1106 1107 /** 1108 * xprt_wait_for_reply_request_def - wait for reply 1109 * @task: pointer to rpc_task 1110 * 1111 * Set a request's retransmit timeout based on the transport's 1112 * default timeout parameters. Used by transports that don't adjust 1113 * the retransmit timeout based on round-trip time estimation, 1114 * and put the task to sleep on the pending queue. 1115 */ 1116 void xprt_wait_for_reply_request_def(struct rpc_task *task) 1117 { 1118 struct rpc_rqst *req = task->tk_rqstp; 1119 1120 rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, 1121 xprt_request_timeout(req)); 1122 } 1123 EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def); 1124 1125 /** 1126 * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator 1127 * @task: pointer to rpc_task 1128 * 1129 * Set a request's retransmit timeout using the RTT estimator, 1130 * and put the task to sleep on the pending queue. 1131 */ 1132 void xprt_wait_for_reply_request_rtt(struct rpc_task *task) 1133 { 1134 int timer = task->tk_msg.rpc_proc->p_timer; 1135 struct rpc_clnt *clnt = task->tk_client; 1136 struct rpc_rtt *rtt = clnt->cl_rtt; 1137 struct rpc_rqst *req = task->tk_rqstp; 1138 unsigned long max_timeout = clnt->cl_timeout->to_maxval; 1139 unsigned long timeout; 1140 1141 timeout = rpc_calc_rto(rtt, timer); 1142 timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; 1143 if (timeout > max_timeout || timeout == 0) 1144 timeout = max_timeout; 1145 rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer, 1146 jiffies + timeout); 1147 } 1148 EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt); 1149 1150 /** 1151 * xprt_request_wait_receive - wait for the reply to an RPC request 1152 * @task: RPC task about to send a request 1153 * 1154 */ 1155 void xprt_request_wait_receive(struct rpc_task *task) 1156 { 1157 struct rpc_rqst *req = task->tk_rqstp; 1158 struct rpc_xprt *xprt = req->rq_xprt; 1159 1160 if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) 1161 return; 1162 /* 1163 * Sleep on the pending queue if we're expecting a reply. 1164 * The spinlock ensures atomicity between the test of 1165 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). 1166 */ 1167 spin_lock(&xprt->queue_lock); 1168 if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { 1169 xprt->ops->wait_for_reply_request(task); 1170 /* 1171 * Send an extra queue wakeup call if the 1172 * connection was dropped in case the call to 1173 * rpc_sleep_on() raced. 1174 */ 1175 if (xprt_request_retransmit_after_disconnect(task)) 1176 rpc_wake_up_queued_task_set_status(&xprt->pending, 1177 task, -ENOTCONN); 1178 } 1179 spin_unlock(&xprt->queue_lock); 1180 } 1181 1182 static bool 1183 xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req) 1184 { 1185 return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); 1186 } 1187 1188 /** 1189 * xprt_request_enqueue_transmit - queue a task for transmission 1190 * @task: pointer to rpc_task 1191 * 1192 * Add a task to the transmission queue. 1193 */ 1194 void 1195 xprt_request_enqueue_transmit(struct rpc_task *task) 1196 { 1197 struct rpc_rqst *pos, *req = task->tk_rqstp; 1198 struct rpc_xprt *xprt = req->rq_xprt; 1199 1200 if (xprt_request_need_enqueue_transmit(task, req)) { 1201 req->rq_bytes_sent = 0; 1202 spin_lock(&xprt->queue_lock); 1203 /* 1204 * Requests that carry congestion control credits are added 1205 * to the head of the list to avoid starvation issues. 1206 */ 1207 if (req->rq_cong) { 1208 xprt_clear_congestion_window_wait(xprt); 1209 list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { 1210 if (pos->rq_cong) 1211 continue; 1212 /* Note: req is added _before_ pos */ 1213 list_add_tail(&req->rq_xmit, &pos->rq_xmit); 1214 INIT_LIST_HEAD(&req->rq_xmit2); 1215 trace_xprt_enq_xmit(task, 1); 1216 goto out; 1217 } 1218 } else if (RPC_IS_SWAPPER(task)) { 1219 list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { 1220 if (pos->rq_cong || pos->rq_bytes_sent) 1221 continue; 1222 if (RPC_IS_SWAPPER(pos->rq_task)) 1223 continue; 1224 /* Note: req is added _before_ pos */ 1225 list_add_tail(&req->rq_xmit, &pos->rq_xmit); 1226 INIT_LIST_HEAD(&req->rq_xmit2); 1227 trace_xprt_enq_xmit(task, 2); 1228 goto out; 1229 } 1230 } else if (!req->rq_seqno) { 1231 list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { 1232 if (pos->rq_task->tk_owner != task->tk_owner) 1233 continue; 1234 list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); 1235 INIT_LIST_HEAD(&req->rq_xmit); 1236 trace_xprt_enq_xmit(task, 3); 1237 goto out; 1238 } 1239 } 1240 list_add_tail(&req->rq_xmit, &xprt->xmit_queue); 1241 INIT_LIST_HEAD(&req->rq_xmit2); 1242 trace_xprt_enq_xmit(task, 4); 1243 out: 1244 set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); 1245 spin_unlock(&xprt->queue_lock); 1246 } 1247 } 1248 1249 /** 1250 * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue 1251 * @task: pointer to rpc_task 1252 * 1253 * Remove a task from the transmission queue 1254 * Caller must hold xprt->queue_lock 1255 */ 1256 static void 1257 xprt_request_dequeue_transmit_locked(struct rpc_task *task) 1258 { 1259 struct rpc_rqst *req = task->tk_rqstp; 1260 1261 if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) 1262 return; 1263 if (!list_empty(&req->rq_xmit)) { 1264 list_del(&req->rq_xmit); 1265 if (!list_empty(&req->rq_xmit2)) { 1266 struct rpc_rqst *next = list_first_entry(&req->rq_xmit2, 1267 struct rpc_rqst, rq_xmit2); 1268 list_del(&req->rq_xmit2); 1269 list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue); 1270 } 1271 } else 1272 list_del(&req->rq_xmit2); 1273 } 1274 1275 /** 1276 * xprt_request_dequeue_transmit - remove a task from the transmission queue 1277 * @task: pointer to rpc_task 1278 * 1279 * Remove a task from the transmission queue 1280 */ 1281 static void 1282 xprt_request_dequeue_transmit(struct rpc_task *task) 1283 { 1284 struct rpc_rqst *req = task->tk_rqstp; 1285 struct rpc_xprt *xprt = req->rq_xprt; 1286 1287 spin_lock(&xprt->queue_lock); 1288 xprt_request_dequeue_transmit_locked(task); 1289 spin_unlock(&xprt->queue_lock); 1290 } 1291 1292 /** 1293 * xprt_request_prepare - prepare an encoded request for transport 1294 * @req: pointer to rpc_rqst 1295 * 1296 * Calls into the transport layer to do whatever is needed to prepare 1297 * the request for transmission or receive. 1298 */ 1299 void 1300 xprt_request_prepare(struct rpc_rqst *req) 1301 { 1302 struct rpc_xprt *xprt = req->rq_xprt; 1303 1304 if (xprt->ops->prepare_request) 1305 xprt->ops->prepare_request(req); 1306 } 1307 1308 /** 1309 * xprt_request_need_retransmit - Test if a task needs retransmission 1310 * @task: pointer to rpc_task 1311 * 1312 * Test for whether a connection breakage requires the task to retransmit 1313 */ 1314 bool 1315 xprt_request_need_retransmit(struct rpc_task *task) 1316 { 1317 return xprt_request_retransmit_after_disconnect(task); 1318 } 1319 1320 /** 1321 * xprt_prepare_transmit - reserve the transport before sending a request 1322 * @task: RPC task about to send a request 1323 * 1324 */ 1325 bool xprt_prepare_transmit(struct rpc_task *task) 1326 { 1327 struct rpc_rqst *req = task->tk_rqstp; 1328 struct rpc_xprt *xprt = req->rq_xprt; 1329 1330 dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); 1331 1332 if (!xprt_lock_write(xprt, task)) { 1333 /* Race breaker: someone may have transmitted us */ 1334 if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) 1335 rpc_wake_up_queued_task_set_status(&xprt->sending, 1336 task, 0); 1337 return false; 1338 1339 } 1340 return true; 1341 } 1342 1343 void xprt_end_transmit(struct rpc_task *task) 1344 { 1345 xprt_release_write(task->tk_rqstp->rq_xprt, task); 1346 } 1347 1348 /** 1349 * xprt_request_transmit - send an RPC request on a transport 1350 * @req: pointer to request to transmit 1351 * @snd_task: RPC task that owns the transport lock 1352 * 1353 * This performs the transmission of a single request. 1354 * Note that if the request is not the same as snd_task, then it 1355 * does need to be pinned. 1356 * Returns '0' on success. 1357 */ 1358 static int 1359 xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) 1360 { 1361 struct rpc_xprt *xprt = req->rq_xprt; 1362 struct rpc_task *task = req->rq_task; 1363 unsigned int connect_cookie; 1364 int is_retrans = RPC_WAS_SENT(task); 1365 int status; 1366 1367 if (!req->rq_bytes_sent) { 1368 if (xprt_request_data_received(task)) { 1369 status = 0; 1370 goto out_dequeue; 1371 } 1372 /* Verify that our message lies in the RPCSEC_GSS window */ 1373 if (rpcauth_xmit_need_reencode(task)) { 1374 status = -EBADMSG; 1375 goto out_dequeue; 1376 } 1377 if (task->tk_ops->rpc_call_prepare_transmit) { 1378 task->tk_ops->rpc_call_prepare_transmit(task, 1379 task->tk_calldata); 1380 status = task->tk_status; 1381 if (status < 0) 1382 goto out_dequeue; 1383 } 1384 if (RPC_SIGNALLED(task)) { 1385 status = -ERESTARTSYS; 1386 goto out_dequeue; 1387 } 1388 } 1389 1390 /* 1391 * Update req->rq_ntrans before transmitting to avoid races with 1392 * xprt_update_rtt(), which needs to know that it is recording a 1393 * reply to the first transmission. 1394 */ 1395 req->rq_ntrans++; 1396 1397 connect_cookie = xprt->connect_cookie; 1398 status = xprt->ops->send_request(req); 1399 if (status != 0) { 1400 req->rq_ntrans--; 1401 trace_xprt_transmit(req, status); 1402 return status; 1403 } 1404 1405 if (is_retrans) 1406 task->tk_client->cl_stats->rpcretrans++; 1407 1408 xprt_inject_disconnect(xprt); 1409 1410 task->tk_flags |= RPC_TASK_SENT; 1411 spin_lock(&xprt->transport_lock); 1412 1413 xprt->stat.sends++; 1414 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; 1415 xprt->stat.bklog_u += xprt->backlog.qlen; 1416 xprt->stat.sending_u += xprt->sending.qlen; 1417 xprt->stat.pending_u += xprt->pending.qlen; 1418 spin_unlock(&xprt->transport_lock); 1419 1420 req->rq_connect_cookie = connect_cookie; 1421 out_dequeue: 1422 trace_xprt_transmit(req, status); 1423 xprt_request_dequeue_transmit(task); 1424 rpc_wake_up_queued_task_set_status(&xprt->sending, task, status); 1425 return status; 1426 } 1427 1428 /** 1429 * xprt_transmit - send an RPC request on a transport 1430 * @task: controlling RPC task 1431 * 1432 * Attempts to drain the transmit queue. On exit, either the transport 1433 * signalled an error that needs to be handled before transmission can 1434 * resume, or @task finished transmitting, and detected that it already 1435 * received a reply. 1436 */ 1437 void 1438 xprt_transmit(struct rpc_task *task) 1439 { 1440 struct rpc_rqst *next, *req = task->tk_rqstp; 1441 struct rpc_xprt *xprt = req->rq_xprt; 1442 int status; 1443 1444 spin_lock(&xprt->queue_lock); 1445 while (!list_empty(&xprt->xmit_queue)) { 1446 next = list_first_entry(&xprt->xmit_queue, 1447 struct rpc_rqst, rq_xmit); 1448 xprt_pin_rqst(next); 1449 spin_unlock(&xprt->queue_lock); 1450 status = xprt_request_transmit(next, task); 1451 if (status == -EBADMSG && next != req) 1452 status = 0; 1453 cond_resched(); 1454 spin_lock(&xprt->queue_lock); 1455 xprt_unpin_rqst(next); 1456 if (status == 0) { 1457 if (!xprt_request_data_received(task) || 1458 test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) 1459 continue; 1460 } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) 1461 task->tk_status = status; 1462 break; 1463 } 1464 spin_unlock(&xprt->queue_lock); 1465 } 1466 1467 static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) 1468 { 1469 set_bit(XPRT_CONGESTED, &xprt->state); 1470 rpc_sleep_on(&xprt->backlog, task, NULL); 1471 } 1472 1473 static void xprt_wake_up_backlog(struct rpc_xprt *xprt) 1474 { 1475 if (rpc_wake_up_next(&xprt->backlog) == NULL) 1476 clear_bit(XPRT_CONGESTED, &xprt->state); 1477 } 1478 1479 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) 1480 { 1481 bool ret = false; 1482 1483 if (!test_bit(XPRT_CONGESTED, &xprt->state)) 1484 goto out; 1485 spin_lock(&xprt->reserve_lock); 1486 if (test_bit(XPRT_CONGESTED, &xprt->state)) { 1487 rpc_sleep_on(&xprt->backlog, task, NULL); 1488 ret = true; 1489 } 1490 spin_unlock(&xprt->reserve_lock); 1491 out: 1492 return ret; 1493 } 1494 1495 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) 1496 { 1497 struct rpc_rqst *req = ERR_PTR(-EAGAIN); 1498 1499 if (xprt->num_reqs >= xprt->max_reqs) 1500 goto out; 1501 ++xprt->num_reqs; 1502 spin_unlock(&xprt->reserve_lock); 1503 req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); 1504 spin_lock(&xprt->reserve_lock); 1505 if (req != NULL) 1506 goto out; 1507 --xprt->num_reqs; 1508 req = ERR_PTR(-ENOMEM); 1509 out: 1510 return req; 1511 } 1512 1513 static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1514 { 1515 if (xprt->num_reqs > xprt->min_reqs) { 1516 --xprt->num_reqs; 1517 kfree(req); 1518 return true; 1519 } 1520 return false; 1521 } 1522 1523 void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) 1524 { 1525 struct rpc_rqst *req; 1526 1527 spin_lock(&xprt->reserve_lock); 1528 if (!list_empty(&xprt->free)) { 1529 req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 1530 list_del(&req->rq_list); 1531 goto out_init_req; 1532 } 1533 req = xprt_dynamic_alloc_slot(xprt); 1534 if (!IS_ERR(req)) 1535 goto out_init_req; 1536 switch (PTR_ERR(req)) { 1537 case -ENOMEM: 1538 dprintk("RPC: dynamic allocation of request slot " 1539 "failed! Retrying\n"); 1540 task->tk_status = -ENOMEM; 1541 break; 1542 case -EAGAIN: 1543 xprt_add_backlog(xprt, task); 1544 dprintk("RPC: waiting for request slot\n"); 1545 /* fall through */ 1546 default: 1547 task->tk_status = -EAGAIN; 1548 } 1549 spin_unlock(&xprt->reserve_lock); 1550 return; 1551 out_init_req: 1552 xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, 1553 xprt->num_reqs); 1554 spin_unlock(&xprt->reserve_lock); 1555 1556 task->tk_status = 0; 1557 task->tk_rqstp = req; 1558 } 1559 EXPORT_SYMBOL_GPL(xprt_alloc_slot); 1560 1561 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1562 { 1563 spin_lock(&xprt->reserve_lock); 1564 if (!xprt_dynamic_free_slot(xprt, req)) { 1565 memset(req, 0, sizeof(*req)); /* mark unused */ 1566 list_add(&req->rq_list, &xprt->free); 1567 } 1568 xprt_wake_up_backlog(xprt); 1569 spin_unlock(&xprt->reserve_lock); 1570 } 1571 EXPORT_SYMBOL_GPL(xprt_free_slot); 1572 1573 static void xprt_free_all_slots(struct rpc_xprt *xprt) 1574 { 1575 struct rpc_rqst *req; 1576 while (!list_empty(&xprt->free)) { 1577 req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); 1578 list_del(&req->rq_list); 1579 kfree(req); 1580 } 1581 } 1582 1583 struct rpc_xprt *xprt_alloc(struct net *net, size_t size, 1584 unsigned int num_prealloc, 1585 unsigned int max_alloc) 1586 { 1587 struct rpc_xprt *xprt; 1588 struct rpc_rqst *req; 1589 int i; 1590 1591 xprt = kzalloc(size, GFP_KERNEL); 1592 if (xprt == NULL) 1593 goto out; 1594 1595 xprt_init(xprt, net); 1596 1597 for (i = 0; i < num_prealloc; i++) { 1598 req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); 1599 if (!req) 1600 goto out_free; 1601 list_add(&req->rq_list, &xprt->free); 1602 } 1603 if (max_alloc > num_prealloc) 1604 xprt->max_reqs = max_alloc; 1605 else 1606 xprt->max_reqs = num_prealloc; 1607 xprt->min_reqs = num_prealloc; 1608 xprt->num_reqs = num_prealloc; 1609 1610 return xprt; 1611 1612 out_free: 1613 xprt_free(xprt); 1614 out: 1615 return NULL; 1616 } 1617 EXPORT_SYMBOL_GPL(xprt_alloc); 1618 1619 void xprt_free(struct rpc_xprt *xprt) 1620 { 1621 put_net(xprt->xprt_net); 1622 xprt_free_all_slots(xprt); 1623 kfree_rcu(xprt, rcu); 1624 } 1625 EXPORT_SYMBOL_GPL(xprt_free); 1626 1627 static void 1628 xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt) 1629 { 1630 req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1; 1631 } 1632 1633 static __be32 1634 xprt_alloc_xid(struct rpc_xprt *xprt) 1635 { 1636 __be32 xid; 1637 1638 spin_lock(&xprt->reserve_lock); 1639 xid = (__force __be32)xprt->xid++; 1640 spin_unlock(&xprt->reserve_lock); 1641 return xid; 1642 } 1643 1644 static void 1645 xprt_init_xid(struct rpc_xprt *xprt) 1646 { 1647 xprt->xid = prandom_u32(); 1648 } 1649 1650 static void 1651 xprt_request_init(struct rpc_task *task) 1652 { 1653 struct rpc_xprt *xprt = task->tk_xprt; 1654 struct rpc_rqst *req = task->tk_rqstp; 1655 1656 req->rq_task = task; 1657 req->rq_xprt = xprt; 1658 req->rq_buffer = NULL; 1659 req->rq_xid = xprt_alloc_xid(xprt); 1660 xprt_init_connect_cookie(req, xprt); 1661 req->rq_snd_buf.len = 0; 1662 req->rq_snd_buf.buflen = 0; 1663 req->rq_rcv_buf.len = 0; 1664 req->rq_rcv_buf.buflen = 0; 1665 req->rq_snd_buf.bvec = NULL; 1666 req->rq_rcv_buf.bvec = NULL; 1667 req->rq_release_snd_buf = NULL; 1668 xprt_init_majortimeo(task, req); 1669 dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, 1670 req, ntohl(req->rq_xid)); 1671 } 1672 1673 static void 1674 xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task) 1675 { 1676 xprt->ops->alloc_slot(xprt, task); 1677 if (task->tk_rqstp != NULL) 1678 xprt_request_init(task); 1679 } 1680 1681 /** 1682 * xprt_reserve - allocate an RPC request slot 1683 * @task: RPC task requesting a slot allocation 1684 * 1685 * If the transport is marked as being congested, or if no more 1686 * slots are available, place the task on the transport's 1687 * backlog queue. 1688 */ 1689 void xprt_reserve(struct rpc_task *task) 1690 { 1691 struct rpc_xprt *xprt = task->tk_xprt; 1692 1693 task->tk_status = 0; 1694 if (task->tk_rqstp != NULL) 1695 return; 1696 1697 task->tk_status = -EAGAIN; 1698 if (!xprt_throttle_congested(xprt, task)) 1699 xprt_do_reserve(xprt, task); 1700 } 1701 1702 /** 1703 * xprt_retry_reserve - allocate an RPC request slot 1704 * @task: RPC task requesting a slot allocation 1705 * 1706 * If no more slots are available, place the task on the transport's 1707 * backlog queue. 1708 * Note that the only difference with xprt_reserve is that we now 1709 * ignore the value of the XPRT_CONGESTED flag. 1710 */ 1711 void xprt_retry_reserve(struct rpc_task *task) 1712 { 1713 struct rpc_xprt *xprt = task->tk_xprt; 1714 1715 task->tk_status = 0; 1716 if (task->tk_rqstp != NULL) 1717 return; 1718 1719 task->tk_status = -EAGAIN; 1720 xprt_do_reserve(xprt, task); 1721 } 1722 1723 static void 1724 xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req) 1725 { 1726 struct rpc_xprt *xprt = req->rq_xprt; 1727 1728 if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) || 1729 test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) || 1730 xprt_is_pinned_rqst(req)) { 1731 spin_lock(&xprt->queue_lock); 1732 xprt_request_dequeue_transmit_locked(task); 1733 xprt_request_dequeue_receive_locked(task); 1734 while (xprt_is_pinned_rqst(req)) { 1735 set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); 1736 spin_unlock(&xprt->queue_lock); 1737 xprt_wait_on_pinned_rqst(req); 1738 spin_lock(&xprt->queue_lock); 1739 clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate); 1740 } 1741 spin_unlock(&xprt->queue_lock); 1742 } 1743 } 1744 1745 /** 1746 * xprt_release - release an RPC request slot 1747 * @task: task which is finished with the slot 1748 * 1749 */ 1750 void xprt_release(struct rpc_task *task) 1751 { 1752 struct rpc_xprt *xprt; 1753 struct rpc_rqst *req = task->tk_rqstp; 1754 1755 if (req == NULL) { 1756 if (task->tk_client) { 1757 xprt = task->tk_xprt; 1758 xprt_release_write(xprt, task); 1759 } 1760 return; 1761 } 1762 1763 xprt = req->rq_xprt; 1764 xprt_request_dequeue_all(task, req); 1765 spin_lock(&xprt->transport_lock); 1766 xprt->ops->release_xprt(xprt, task); 1767 if (xprt->ops->release_request) 1768 xprt->ops->release_request(task); 1769 xprt_schedule_autodisconnect(xprt); 1770 spin_unlock(&xprt->transport_lock); 1771 if (req->rq_buffer) 1772 xprt->ops->buf_free(task); 1773 xprt_inject_disconnect(xprt); 1774 xdr_free_bvec(&req->rq_rcv_buf); 1775 xdr_free_bvec(&req->rq_snd_buf); 1776 if (req->rq_cred != NULL) 1777 put_rpccred(req->rq_cred); 1778 task->tk_rqstp = NULL; 1779 if (req->rq_release_snd_buf) 1780 req->rq_release_snd_buf(req); 1781 1782 dprintk("RPC: %5u release request %p\n", task->tk_pid, req); 1783 if (likely(!bc_prealloc(req))) 1784 xprt->ops->free_slot(xprt, req); 1785 else 1786 xprt_free_bc_request(req); 1787 } 1788 1789 #ifdef CONFIG_SUNRPC_BACKCHANNEL 1790 void 1791 xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) 1792 { 1793 struct xdr_buf *xbufp = &req->rq_snd_buf; 1794 1795 task->tk_rqstp = req; 1796 req->rq_task = task; 1797 xprt_init_connect_cookie(req, req->rq_xprt); 1798 /* 1799 * Set up the xdr_buf length. 1800 * This also indicates that the buffer is XDR encoded already. 1801 */ 1802 xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + 1803 xbufp->tail[0].iov_len; 1804 } 1805 #endif 1806 1807 static void xprt_init(struct rpc_xprt *xprt, struct net *net) 1808 { 1809 kref_init(&xprt->kref); 1810 1811 spin_lock_init(&xprt->transport_lock); 1812 spin_lock_init(&xprt->reserve_lock); 1813 spin_lock_init(&xprt->queue_lock); 1814 1815 INIT_LIST_HEAD(&xprt->free); 1816 xprt->recv_queue = RB_ROOT; 1817 INIT_LIST_HEAD(&xprt->xmit_queue); 1818 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1819 spin_lock_init(&xprt->bc_pa_lock); 1820 INIT_LIST_HEAD(&xprt->bc_pa_list); 1821 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1822 INIT_LIST_HEAD(&xprt->xprt_switch); 1823 1824 xprt->last_used = jiffies; 1825 xprt->cwnd = RPC_INITCWND; 1826 xprt->bind_index = 0; 1827 1828 rpc_init_wait_queue(&xprt->binding, "xprt_binding"); 1829 rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 1830 rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 1831 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 1832 1833 xprt_init_xid(xprt); 1834 1835 xprt->xprt_net = get_net(net); 1836 } 1837 1838 /** 1839 * xprt_create_transport - create an RPC transport 1840 * @args: rpc transport creation arguments 1841 * 1842 */ 1843 struct rpc_xprt *xprt_create_transport(struct xprt_create *args) 1844 { 1845 struct rpc_xprt *xprt; 1846 struct xprt_class *t; 1847 1848 spin_lock(&xprt_list_lock); 1849 list_for_each_entry(t, &xprt_list, list) { 1850 if (t->ident == args->ident) { 1851 spin_unlock(&xprt_list_lock); 1852 goto found; 1853 } 1854 } 1855 spin_unlock(&xprt_list_lock); 1856 dprintk("RPC: transport (%d) not supported\n", args->ident); 1857 return ERR_PTR(-EIO); 1858 1859 found: 1860 xprt = t->setup(args); 1861 if (IS_ERR(xprt)) { 1862 dprintk("RPC: xprt_create_transport: failed, %ld\n", 1863 -PTR_ERR(xprt)); 1864 goto out; 1865 } 1866 if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT) 1867 xprt->idle_timeout = 0; 1868 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1869 if (xprt_has_timer(xprt)) 1870 timer_setup(&xprt->timer, xprt_init_autodisconnect, 0); 1871 else 1872 timer_setup(&xprt->timer, NULL, 0); 1873 1874 if (strlen(args->servername) > RPC_MAXNETNAMELEN) { 1875 xprt_destroy(xprt); 1876 return ERR_PTR(-EINVAL); 1877 } 1878 xprt->servername = kstrdup(args->servername, GFP_KERNEL); 1879 if (xprt->servername == NULL) { 1880 xprt_destroy(xprt); 1881 return ERR_PTR(-ENOMEM); 1882 } 1883 1884 rpc_xprt_debugfs_register(xprt); 1885 1886 dprintk("RPC: created transport %p with %u slots\n", xprt, 1887 xprt->max_reqs); 1888 out: 1889 return xprt; 1890 } 1891 1892 static void xprt_destroy_cb(struct work_struct *work) 1893 { 1894 struct rpc_xprt *xprt = 1895 container_of(work, struct rpc_xprt, task_cleanup); 1896 1897 rpc_xprt_debugfs_unregister(xprt); 1898 rpc_destroy_wait_queue(&xprt->binding); 1899 rpc_destroy_wait_queue(&xprt->pending); 1900 rpc_destroy_wait_queue(&xprt->sending); 1901 rpc_destroy_wait_queue(&xprt->backlog); 1902 kfree(xprt->servername); 1903 /* 1904 * Tear down transport state and free the rpc_xprt 1905 */ 1906 xprt->ops->destroy(xprt); 1907 } 1908 1909 /** 1910 * xprt_destroy - destroy an RPC transport, killing off all requests. 1911 * @xprt: transport to destroy 1912 * 1913 */ 1914 static void xprt_destroy(struct rpc_xprt *xprt) 1915 { 1916 dprintk("RPC: destroying transport %p\n", xprt); 1917 1918 /* 1919 * Exclude transport connect/disconnect handlers and autoclose 1920 */ 1921 wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); 1922 1923 del_timer_sync(&xprt->timer); 1924 1925 /* 1926 * Destroy sockets etc from the system workqueue so they can 1927 * safely flush receive work running on rpciod. 1928 */ 1929 INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb); 1930 schedule_work(&xprt->task_cleanup); 1931 } 1932 1933 static void xprt_destroy_kref(struct kref *kref) 1934 { 1935 xprt_destroy(container_of(kref, struct rpc_xprt, kref)); 1936 } 1937 1938 /** 1939 * xprt_get - return a reference to an RPC transport. 1940 * @xprt: pointer to the transport 1941 * 1942 */ 1943 struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) 1944 { 1945 if (xprt != NULL && kref_get_unless_zero(&xprt->kref)) 1946 return xprt; 1947 return NULL; 1948 } 1949 EXPORT_SYMBOL_GPL(xprt_get); 1950 1951 /** 1952 * xprt_put - release a reference to an RPC transport. 1953 * @xprt: pointer to the transport 1954 * 1955 */ 1956 void xprt_put(struct rpc_xprt *xprt) 1957 { 1958 if (xprt != NULL) 1959 kref_put(&xprt->kref, xprt_destroy_kref); 1960 } 1961 EXPORT_SYMBOL_GPL(xprt_put); 1962