1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Multipath support for RPC 4 * 5 * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. 6 * 7 * Trond Myklebust <trond.myklebust@primarydata.com> 8 * 9 */ 10 #include <linux/atomic.h> 11 #include <linux/types.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rculist.h> 16 #include <linux/slab.h> 17 #include <linux/spinlock.h> 18 #include <linux/sunrpc/xprt.h> 19 #include <linux/sunrpc/addr.h> 20 #include <linux/sunrpc/xprtmultipath.h> 21 22 #include "sysfs.h" 23 24 typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, 25 const struct rpc_xprt *cur); 26 27 static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; 28 static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; 29 static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; 30 static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; 31 32 static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, 33 struct rpc_xprt *xprt) 34 { 35 if (unlikely(xprt_get(xprt) == NULL)) 36 return; 37 list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); 38 smp_wmb(); 39 if (xps->xps_nxprts == 0) 40 xps->xps_net = xprt->xprt_net; 41 xps->xps_nxprts++; 42 xps->xps_nactive++; 43 } 44 45 /** 46 * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch 47 * @xps: pointer to struct rpc_xprt_switch 48 * @xprt: pointer to struct rpc_xprt 49 * 50 * Adds xprt to the end of the list of struct rpc_xprt in xps. 51 */ 52 void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, 53 struct rpc_xprt *xprt) 54 { 55 if (xprt == NULL) 56 return; 57 spin_lock(&xps->xps_lock); 58 if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) 59 xprt_switch_add_xprt_locked(xps, xprt); 60 spin_unlock(&xps->xps_lock); 61 rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); 62 } 63 64 static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, 65 struct rpc_xprt *xprt) 66 { 67 if (unlikely(xprt == NULL)) 68 return; 69 if (!test_bit(XPRT_OFFLINE, &xprt->state)) 70 xps->xps_nactive--; 71 xps->xps_nxprts--; 72 if (xps->xps_nxprts == 0) 73 xps->xps_net = NULL; 74 smp_wmb(); 75 list_del_rcu(&xprt->xprt_switch); 76 } 77 78 /** 79 * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch 80 * @xps: pointer to struct rpc_xprt_switch 81 * @xprt: pointer to struct rpc_xprt 82 * 83 * Removes xprt from the list of struct rpc_xprt in xps. 84 */ 85 void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, 86 struct rpc_xprt *xprt) 87 { 88 spin_lock(&xps->xps_lock); 89 xprt_switch_remove_xprt_locked(xps, xprt); 90 spin_unlock(&xps->xps_lock); 91 xprt_put(xprt); 92 } 93 94 static DEFINE_IDA(rpc_xprtswitch_ids); 95 96 void xprt_multipath_cleanup_ids(void) 97 { 98 ida_destroy(&rpc_xprtswitch_ids); 99 } 100 101 static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) 102 { 103 int id; 104 105 id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); 106 if (id < 0) 107 return id; 108 109 xps->xps_id = id; 110 return 0; 111 } 112 113 static void xprt_switch_free_id(struct rpc_xprt_switch *xps) 114 { 115 ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); 116 } 117 118 /** 119 * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch 120 * @xprt: pointer to struct rpc_xprt 121 * @gfp_flags: allocation flags 122 * 123 * On success, returns an initialised struct rpc_xprt_switch, containing 124 * the entry xprt. Returns NULL on failure. 125 */ 126 struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, 127 gfp_t gfp_flags) 128 { 129 struct rpc_xprt_switch *xps; 130 131 xps = kmalloc(sizeof(*xps), gfp_flags); 132 if (xps != NULL) { 133 spin_lock_init(&xps->xps_lock); 134 kref_init(&xps->xps_kref); 135 xprt_switch_alloc_id(xps, gfp_flags); 136 xps->xps_nxprts = xps->xps_nactive = 0; 137 atomic_long_set(&xps->xps_queuelen, 0); 138 xps->xps_net = NULL; 139 INIT_LIST_HEAD(&xps->xps_xprt_list); 140 xps->xps_iter_ops = &rpc_xprt_iter_singular; 141 rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); 142 xprt_switch_add_xprt_locked(xps, xprt); 143 xps->xps_nunique_destaddr_xprts = 1; 144 rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); 145 } 146 147 return xps; 148 } 149 150 static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) 151 { 152 spin_lock(&xps->xps_lock); 153 while (!list_empty(&xps->xps_xprt_list)) { 154 struct rpc_xprt *xprt; 155 156 xprt = list_first_entry(&xps->xps_xprt_list, 157 struct rpc_xprt, xprt_switch); 158 xprt_switch_remove_xprt_locked(xps, xprt); 159 spin_unlock(&xps->xps_lock); 160 xprt_put(xprt); 161 spin_lock(&xps->xps_lock); 162 } 163 spin_unlock(&xps->xps_lock); 164 } 165 166 static void xprt_switch_free(struct kref *kref) 167 { 168 struct rpc_xprt_switch *xps = container_of(kref, 169 struct rpc_xprt_switch, xps_kref); 170 171 xprt_switch_free_entries(xps); 172 rpc_sysfs_xprt_switch_destroy(xps); 173 xprt_switch_free_id(xps); 174 kfree_rcu(xps, xps_rcu); 175 } 176 177 /** 178 * xprt_switch_get - Return a reference to a rpc_xprt_switch 179 * @xps: pointer to struct rpc_xprt_switch 180 * 181 * Returns a reference to xps unless the refcount is already zero. 182 */ 183 struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps) 184 { 185 if (xps != NULL && kref_get_unless_zero(&xps->xps_kref)) 186 return xps; 187 return NULL; 188 } 189 190 /** 191 * xprt_switch_put - Release a reference to a rpc_xprt_switch 192 * @xps: pointer to struct rpc_xprt_switch 193 * 194 * Release the reference to xps, and free it once the refcount is zero. 195 */ 196 void xprt_switch_put(struct rpc_xprt_switch *xps) 197 { 198 if (xps != NULL) 199 kref_put(&xps->xps_kref, xprt_switch_free); 200 } 201 202 /** 203 * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch 204 * @xps: pointer to struct rpc_xprt_switch 205 * 206 * Sets a round-robin default policy for iterators acting on xps. 207 */ 208 void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps) 209 { 210 if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin) 211 WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin); 212 } 213 214 static 215 const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi) 216 { 217 if (xpi->xpi_ops != NULL) 218 return xpi->xpi_ops; 219 return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops; 220 } 221 222 static 223 void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi) 224 { 225 } 226 227 static 228 void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) 229 { 230 WRITE_ONCE(xpi->xpi_cursor, NULL); 231 } 232 233 static 234 bool xprt_is_active(const struct rpc_xprt *xprt) 235 { 236 return (kref_read(&xprt->kref) != 0 && 237 !test_bit(XPRT_OFFLINE, &xprt->state)); 238 } 239 240 static 241 struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) 242 { 243 struct rpc_xprt *pos; 244 245 list_for_each_entry_rcu(pos, head, xprt_switch) { 246 if (xprt_is_active(pos)) 247 return pos; 248 } 249 return NULL; 250 } 251 252 static 253 struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) 254 { 255 struct rpc_xprt *pos; 256 257 list_for_each_entry_rcu(pos, head, xprt_switch) { 258 if (!xprt_is_active(pos)) 259 return pos; 260 } 261 return NULL; 262 } 263 264 static 265 struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) 266 { 267 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 268 269 if (xps == NULL) 270 return NULL; 271 return xprt_switch_find_first_entry(&xps->xps_xprt_list); 272 } 273 274 static 275 struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, 276 const struct rpc_xprt *cur, 277 bool find_active) 278 { 279 struct rpc_xprt *pos; 280 bool found = false; 281 282 list_for_each_entry_rcu(pos, head, xprt_switch) { 283 if (cur == pos) 284 found = true; 285 if (found && ((find_active && xprt_is_active(pos)) || 286 (!find_active && xprt_is_active(pos)))) 287 return pos; 288 } 289 return NULL; 290 } 291 292 static 293 struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, 294 const struct rpc_xprt *cur) 295 { 296 return _xprt_switch_find_current_entry(head, cur, true); 297 } 298 299 static 300 struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, 301 struct rpc_xprt *first_entry(struct list_head *head), 302 struct rpc_xprt *current_entry(struct list_head *head, 303 const struct rpc_xprt *cur)) 304 { 305 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 306 struct list_head *head; 307 308 if (xps == NULL) 309 return NULL; 310 head = &xps->xps_xprt_list; 311 if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) 312 return first_entry(head); 313 return current_entry(head, xpi->xpi_cursor); 314 } 315 316 static 317 struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) 318 { 319 return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, 320 xprt_switch_find_current_entry); 321 } 322 323 static 324 struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, 325 const struct rpc_xprt *cur) 326 { 327 return _xprt_switch_find_current_entry(head, cur, false); 328 } 329 330 static 331 struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) 332 { 333 return _xprt_iter_current_entry(xpi, 334 xprt_switch_find_first_entry_offline, 335 xprt_switch_find_current_entry_offline); 336 } 337 338 bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, 339 const struct sockaddr *sap) 340 { 341 struct list_head *head; 342 struct rpc_xprt *pos; 343 344 if (xps == NULL || sap == NULL) 345 return false; 346 347 head = &xps->xps_xprt_list; 348 list_for_each_entry_rcu(pos, head, xprt_switch) { 349 if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { 350 pr_info("RPC: addr %s already in xprt switch\n", 351 pos->address_strings[RPC_DISPLAY_ADDR]); 352 return true; 353 } 354 } 355 return false; 356 } 357 358 static 359 struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, 360 const struct rpc_xprt *cur, bool check_active) 361 { 362 struct rpc_xprt *pos, *prev = NULL; 363 bool found = false; 364 365 list_for_each_entry_rcu(pos, head, xprt_switch) { 366 if (cur == prev) 367 found = true; 368 /* for request to return active transports return only 369 * active, for request to return offline transports 370 * return only offline 371 */ 372 if (found && ((check_active && xprt_is_active(pos)) || 373 (!check_active && !xprt_is_active(pos)))) 374 return pos; 375 prev = pos; 376 } 377 return NULL; 378 } 379 380 static 381 struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, 382 struct rpc_xprt **cursor, 383 xprt_switch_find_xprt_t find_next) 384 { 385 struct rpc_xprt *pos, *old; 386 387 old = smp_load_acquire(cursor); 388 pos = find_next(xps, old); 389 smp_store_release(cursor, pos); 390 return pos; 391 } 392 393 static 394 struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi, 395 xprt_switch_find_xprt_t find_next) 396 { 397 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 398 399 if (xps == NULL) 400 return NULL; 401 return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); 402 } 403 404 static 405 struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, 406 const struct rpc_xprt *cur) 407 { 408 struct rpc_xprt *ret; 409 410 ret = xprt_switch_find_next_entry(head, cur, true); 411 if (ret != NULL) 412 return ret; 413 return xprt_switch_find_first_entry(head); 414 } 415 416 static 417 struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, 418 const struct rpc_xprt *cur) 419 { 420 struct list_head *head = &xps->xps_xprt_list; 421 struct rpc_xprt *xprt; 422 unsigned int nactive; 423 424 for (;;) { 425 unsigned long xprt_queuelen, xps_queuelen; 426 427 xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); 428 if (!xprt) 429 break; 430 xprt_queuelen = atomic_long_read(&xprt->queuelen); 431 xps_queuelen = atomic_long_read(&xps->xps_queuelen); 432 nactive = READ_ONCE(xps->xps_nactive); 433 /* Exit loop if xprt_queuelen <= average queue length */ 434 if (xprt_queuelen * nactive <= xps_queuelen) 435 break; 436 cur = xprt; 437 } 438 return xprt; 439 } 440 441 static 442 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) 443 { 444 return xprt_iter_next_entry_multiple(xpi, 445 xprt_switch_find_next_entry_roundrobin); 446 } 447 448 static 449 struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, 450 const struct rpc_xprt *cur) 451 { 452 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); 453 } 454 455 static 456 struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, 457 const struct rpc_xprt *cur) 458 { 459 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); 460 } 461 462 static 463 struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) 464 { 465 return xprt_iter_next_entry_multiple(xpi, 466 xprt_switch_find_next_entry_all); 467 } 468 469 static 470 struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) 471 { 472 return xprt_iter_next_entry_multiple(xpi, 473 xprt_switch_find_next_entry_offline); 474 } 475 476 /* 477 * xprt_iter_rewind - Resets the xprt iterator 478 * @xpi: pointer to rpc_xprt_iter 479 * 480 * Resets xpi to ensure that it points to the first entry in the list 481 * of transports. 482 */ 483 static 484 void xprt_iter_rewind(struct rpc_xprt_iter *xpi) 485 { 486 rcu_read_lock(); 487 xprt_iter_ops(xpi)->xpi_rewind(xpi); 488 rcu_read_unlock(); 489 } 490 491 static void __xprt_iter_init(struct rpc_xprt_iter *xpi, 492 struct rpc_xprt_switch *xps, 493 const struct rpc_xprt_iter_ops *ops) 494 { 495 rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps)); 496 xpi->xpi_cursor = NULL; 497 xpi->xpi_ops = ops; 498 } 499 500 /** 501 * xprt_iter_init - Initialise an xprt iterator 502 * @xpi: pointer to rpc_xprt_iter 503 * @xps: pointer to rpc_xprt_switch 504 * 505 * Initialises the iterator to use the default iterator ops 506 * as set in xps. This function is mainly intended for internal 507 * use in the rpc_client. 508 */ 509 void xprt_iter_init(struct rpc_xprt_iter *xpi, 510 struct rpc_xprt_switch *xps) 511 { 512 __xprt_iter_init(xpi, xps, NULL); 513 } 514 515 /** 516 * xprt_iter_init_listall - Initialise an xprt iterator 517 * @xpi: pointer to rpc_xprt_iter 518 * @xps: pointer to rpc_xprt_switch 519 * 520 * Initialises the iterator to iterate once through the entire list 521 * of entries in xps. 522 */ 523 void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, 524 struct rpc_xprt_switch *xps) 525 { 526 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); 527 } 528 529 void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, 530 struct rpc_xprt_switch *xps) 531 { 532 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); 533 } 534 535 /** 536 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch 537 * @xpi: pointer to rpc_xprt_iter 538 * @newswitch: pointer to a new rpc_xprt_switch or NULL 539 * 540 * Swaps out the existing xpi->xpi_xpswitch with a new value. 541 */ 542 struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, 543 struct rpc_xprt_switch *newswitch) 544 { 545 struct rpc_xprt_switch __rcu *oldswitch; 546 547 /* Atomically swap out the old xpswitch */ 548 oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch)); 549 if (newswitch != NULL) 550 xprt_iter_rewind(xpi); 551 return rcu_dereference_protected(oldswitch, true); 552 } 553 554 /** 555 * xprt_iter_destroy - Destroys the xprt iterator 556 * @xpi: pointer to rpc_xprt_iter 557 */ 558 void xprt_iter_destroy(struct rpc_xprt_iter *xpi) 559 { 560 xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL)); 561 } 562 563 /** 564 * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor 565 * @xpi: pointer to rpc_xprt_iter 566 * 567 * Returns a pointer to the struct rpc_xprt that is currently 568 * pointed to by the cursor. 569 * Caller must be holding rcu_read_lock(). 570 */ 571 struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi) 572 { 573 WARN_ON_ONCE(!rcu_read_lock_held()); 574 return xprt_iter_ops(xpi)->xpi_xprt(xpi); 575 } 576 577 static 578 struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, 579 struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) 580 { 581 struct rpc_xprt *ret; 582 583 do { 584 ret = fn(xpi); 585 if (ret == NULL) 586 break; 587 ret = xprt_get(ret); 588 } while (ret == NULL); 589 return ret; 590 } 591 592 /** 593 * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor 594 * @xpi: pointer to rpc_xprt_iter 595 * 596 * Returns a reference to the struct rpc_xprt that is currently 597 * pointed to by the cursor. 598 */ 599 struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi) 600 { 601 struct rpc_xprt *xprt; 602 603 rcu_read_lock(); 604 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt); 605 rcu_read_unlock(); 606 return xprt; 607 } 608 609 /** 610 * xprt_iter_get_next - Returns the next rpc_xprt following the cursor 611 * @xpi: pointer to rpc_xprt_iter 612 * 613 * Returns a reference to the struct rpc_xprt that immediately follows the 614 * entry pointed to by the cursor. 615 */ 616 struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi) 617 { 618 struct rpc_xprt *xprt; 619 620 rcu_read_lock(); 621 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next); 622 rcu_read_unlock(); 623 return xprt; 624 } 625 626 /* Policy for always returning the first entry in the rpc_xprt_switch */ 627 static 628 const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = { 629 .xpi_rewind = xprt_iter_no_rewind, 630 .xpi_xprt = xprt_iter_first_entry, 631 .xpi_next = xprt_iter_first_entry, 632 }; 633 634 /* Policy for round-robin iteration of entries in the rpc_xprt_switch */ 635 static 636 const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = { 637 .xpi_rewind = xprt_iter_default_rewind, 638 .xpi_xprt = xprt_iter_current_entry, 639 .xpi_next = xprt_iter_next_entry_roundrobin, 640 }; 641 642 /* Policy for once-through iteration of entries in the rpc_xprt_switch */ 643 static 644 const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { 645 .xpi_rewind = xprt_iter_default_rewind, 646 .xpi_xprt = xprt_iter_current_entry, 647 .xpi_next = xprt_iter_next_entry_all, 648 }; 649 650 static 651 const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { 652 .xpi_rewind = xprt_iter_default_rewind, 653 .xpi_xprt = xprt_iter_current_entry_offline, 654 .xpi_next = xprt_iter_next_entry_offline, 655 }; 656