1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Multipath support for RPC 4 * 5 * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. 6 * 7 * Trond Myklebust <trond.myklebust@primarydata.com> 8 * 9 */ 10 #include <linux/atomic.h> 11 #include <linux/types.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/rculist.h> 16 #include <linux/slab.h> 17 #include <linux/spinlock.h> 18 #include <linux/sunrpc/xprt.h> 19 #include <linux/sunrpc/addr.h> 20 #include <linux/sunrpc/xprtmultipath.h> 21 22 #include "sysfs.h" 23 24 typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, 25 const struct rpc_xprt *cur); 26 27 static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; 28 static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; 29 static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; 30 static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; 31 32 static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, 33 struct rpc_xprt *xprt) 34 { 35 if (unlikely(xprt_get(xprt) == NULL)) 36 return; 37 list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); 38 smp_wmb(); 39 if (xps->xps_nxprts == 0) 40 xps->xps_net = xprt->xprt_net; 41 xps->xps_nxprts++; 42 xps->xps_nactive++; 43 } 44 45 /** 46 * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch 47 * @xps: pointer to struct rpc_xprt_switch 48 * @xprt: pointer to struct rpc_xprt 49 * 50 * Adds xprt to the end of the list of struct rpc_xprt in xps. 51 */ 52 void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, 53 struct rpc_xprt *xprt) 54 { 55 if (xprt == NULL) 56 return; 57 spin_lock(&xps->xps_lock); 58 if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) 59 xprt_switch_add_xprt_locked(xps, xprt); 60 spin_unlock(&xps->xps_lock); 61 rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); 62 } 63 64 static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, 65 struct rpc_xprt *xprt, bool offline) 66 { 67 if (unlikely(xprt == NULL)) 68 return; 69 if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline) 70 xps->xps_nactive--; 71 xps->xps_nxprts--; 72 if (xps->xps_nxprts == 0) 73 xps->xps_net = NULL; 74 smp_wmb(); 75 list_del_rcu(&xprt->xprt_switch); 76 } 77 78 /** 79 * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch 80 * @xps: pointer to struct rpc_xprt_switch 81 * @xprt: pointer to struct rpc_xprt 82 * @offline: indicates if the xprt that's being removed is in an offline state 83 * 84 * Removes xprt from the list of struct rpc_xprt in xps. 85 */ 86 void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, 87 struct rpc_xprt *xprt, bool offline) 88 { 89 spin_lock(&xps->xps_lock); 90 xprt_switch_remove_xprt_locked(xps, xprt, offline); 91 spin_unlock(&xps->xps_lock); 92 xprt_put(xprt); 93 } 94 95 static DEFINE_IDA(rpc_xprtswitch_ids); 96 97 void xprt_multipath_cleanup_ids(void) 98 { 99 ida_destroy(&rpc_xprtswitch_ids); 100 } 101 102 static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) 103 { 104 int id; 105 106 id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); 107 if (id < 0) 108 return id; 109 110 xps->xps_id = id; 111 return 0; 112 } 113 114 static void xprt_switch_free_id(struct rpc_xprt_switch *xps) 115 { 116 ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); 117 } 118 119 /** 120 * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch 121 * @xprt: pointer to struct rpc_xprt 122 * @gfp_flags: allocation flags 123 * 124 * On success, returns an initialised struct rpc_xprt_switch, containing 125 * the entry xprt. Returns NULL on failure. 126 */ 127 struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, 128 gfp_t gfp_flags) 129 { 130 struct rpc_xprt_switch *xps; 131 132 xps = kmalloc(sizeof(*xps), gfp_flags); 133 if (xps != NULL) { 134 spin_lock_init(&xps->xps_lock); 135 kref_init(&xps->xps_kref); 136 xprt_switch_alloc_id(xps, gfp_flags); 137 xps->xps_nxprts = xps->xps_nactive = 0; 138 atomic_long_set(&xps->xps_queuelen, 0); 139 xps->xps_net = NULL; 140 INIT_LIST_HEAD(&xps->xps_xprt_list); 141 xps->xps_iter_ops = &rpc_xprt_iter_singular; 142 rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); 143 xprt_switch_add_xprt_locked(xps, xprt); 144 xps->xps_nunique_destaddr_xprts = 1; 145 rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); 146 } 147 148 return xps; 149 } 150 151 static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) 152 { 153 spin_lock(&xps->xps_lock); 154 while (!list_empty(&xps->xps_xprt_list)) { 155 struct rpc_xprt *xprt; 156 157 xprt = list_first_entry(&xps->xps_xprt_list, 158 struct rpc_xprt, xprt_switch); 159 xprt_switch_remove_xprt_locked(xps, xprt, true); 160 spin_unlock(&xps->xps_lock); 161 xprt_put(xprt); 162 spin_lock(&xps->xps_lock); 163 } 164 spin_unlock(&xps->xps_lock); 165 } 166 167 static void xprt_switch_free(struct kref *kref) 168 { 169 struct rpc_xprt_switch *xps = container_of(kref, 170 struct rpc_xprt_switch, xps_kref); 171 172 xprt_switch_free_entries(xps); 173 rpc_sysfs_xprt_switch_destroy(xps); 174 xprt_switch_free_id(xps); 175 kfree_rcu(xps, xps_rcu); 176 } 177 178 /** 179 * xprt_switch_get - Return a reference to a rpc_xprt_switch 180 * @xps: pointer to struct rpc_xprt_switch 181 * 182 * Returns a reference to xps unless the refcount is already zero. 183 */ 184 struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps) 185 { 186 if (xps != NULL && kref_get_unless_zero(&xps->xps_kref)) 187 return xps; 188 return NULL; 189 } 190 191 /** 192 * xprt_switch_put - Release a reference to a rpc_xprt_switch 193 * @xps: pointer to struct rpc_xprt_switch 194 * 195 * Release the reference to xps, and free it once the refcount is zero. 196 */ 197 void xprt_switch_put(struct rpc_xprt_switch *xps) 198 { 199 if (xps != NULL) 200 kref_put(&xps->xps_kref, xprt_switch_free); 201 } 202 203 /** 204 * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch 205 * @xps: pointer to struct rpc_xprt_switch 206 * 207 * Sets a round-robin default policy for iterators acting on xps. 208 */ 209 void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps) 210 { 211 if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin) 212 WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin); 213 } 214 215 static 216 const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi) 217 { 218 if (xpi->xpi_ops != NULL) 219 return xpi->xpi_ops; 220 return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops; 221 } 222 223 static 224 void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi) 225 { 226 } 227 228 static 229 void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) 230 { 231 WRITE_ONCE(xpi->xpi_cursor, NULL); 232 } 233 234 static 235 bool xprt_is_active(const struct rpc_xprt *xprt) 236 { 237 return (kref_read(&xprt->kref) != 0 && 238 !test_bit(XPRT_OFFLINE, &xprt->state)); 239 } 240 241 static 242 struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) 243 { 244 struct rpc_xprt *pos; 245 246 list_for_each_entry_rcu(pos, head, xprt_switch) { 247 if (xprt_is_active(pos)) 248 return pos; 249 } 250 return NULL; 251 } 252 253 static 254 struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) 255 { 256 struct rpc_xprt *pos; 257 258 list_for_each_entry_rcu(pos, head, xprt_switch) { 259 if (!xprt_is_active(pos)) 260 return pos; 261 } 262 return NULL; 263 } 264 265 static 266 struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) 267 { 268 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 269 270 if (xps == NULL) 271 return NULL; 272 return xprt_switch_find_first_entry(&xps->xps_xprt_list); 273 } 274 275 static 276 struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, 277 const struct rpc_xprt *cur, 278 bool find_active) 279 { 280 struct rpc_xprt *pos; 281 bool found = false; 282 283 list_for_each_entry_rcu(pos, head, xprt_switch) { 284 if (cur == pos) 285 found = true; 286 if (found && ((find_active && xprt_is_active(pos)) || 287 (!find_active && xprt_is_active(pos)))) 288 return pos; 289 } 290 return NULL; 291 } 292 293 static 294 struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, 295 const struct rpc_xprt *cur) 296 { 297 return _xprt_switch_find_current_entry(head, cur, true); 298 } 299 300 static 301 struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, 302 struct rpc_xprt *first_entry(struct list_head *head), 303 struct rpc_xprt *current_entry(struct list_head *head, 304 const struct rpc_xprt *cur)) 305 { 306 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 307 struct list_head *head; 308 309 if (xps == NULL) 310 return NULL; 311 head = &xps->xps_xprt_list; 312 if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) 313 return first_entry(head); 314 return current_entry(head, xpi->xpi_cursor); 315 } 316 317 static 318 struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) 319 { 320 return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, 321 xprt_switch_find_current_entry); 322 } 323 324 static 325 struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, 326 const struct rpc_xprt *cur) 327 { 328 return _xprt_switch_find_current_entry(head, cur, false); 329 } 330 331 static 332 struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) 333 { 334 return _xprt_iter_current_entry(xpi, 335 xprt_switch_find_first_entry_offline, 336 xprt_switch_find_current_entry_offline); 337 } 338 339 bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, 340 const struct sockaddr *sap) 341 { 342 struct list_head *head; 343 struct rpc_xprt *pos; 344 345 if (xps == NULL || sap == NULL) 346 return false; 347 348 head = &xps->xps_xprt_list; 349 list_for_each_entry_rcu(pos, head, xprt_switch) { 350 if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { 351 pr_info("RPC: addr %s already in xprt switch\n", 352 pos->address_strings[RPC_DISPLAY_ADDR]); 353 return true; 354 } 355 } 356 return false; 357 } 358 359 static 360 struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, 361 const struct rpc_xprt *cur, bool check_active) 362 { 363 struct rpc_xprt *pos, *prev = NULL; 364 bool found = false; 365 366 list_for_each_entry_rcu(pos, head, xprt_switch) { 367 if (cur == prev) 368 found = true; 369 /* for request to return active transports return only 370 * active, for request to return offline transports 371 * return only offline 372 */ 373 if (found && ((check_active && xprt_is_active(pos)) || 374 (!check_active && !xprt_is_active(pos)))) 375 return pos; 376 prev = pos; 377 } 378 return NULL; 379 } 380 381 static 382 struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, 383 struct rpc_xprt **cursor, 384 xprt_switch_find_xprt_t find_next) 385 { 386 struct rpc_xprt *pos, *old; 387 388 old = smp_load_acquire(cursor); 389 pos = find_next(xps, old); 390 smp_store_release(cursor, pos); 391 return pos; 392 } 393 394 static 395 struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi, 396 xprt_switch_find_xprt_t find_next) 397 { 398 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 399 400 if (xps == NULL) 401 return NULL; 402 return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); 403 } 404 405 static 406 struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, 407 const struct rpc_xprt *cur) 408 { 409 struct rpc_xprt *ret; 410 411 ret = xprt_switch_find_next_entry(head, cur, true); 412 if (ret != NULL) 413 return ret; 414 return xprt_switch_find_first_entry(head); 415 } 416 417 static 418 struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, 419 const struct rpc_xprt *cur) 420 { 421 struct list_head *head = &xps->xps_xprt_list; 422 struct rpc_xprt *xprt; 423 unsigned int nactive; 424 425 for (;;) { 426 unsigned long xprt_queuelen, xps_queuelen; 427 428 xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); 429 if (!xprt) 430 break; 431 xprt_queuelen = atomic_long_read(&xprt->queuelen); 432 xps_queuelen = atomic_long_read(&xps->xps_queuelen); 433 nactive = READ_ONCE(xps->xps_nactive); 434 /* Exit loop if xprt_queuelen <= average queue length */ 435 if (xprt_queuelen * nactive <= xps_queuelen) 436 break; 437 cur = xprt; 438 } 439 return xprt; 440 } 441 442 static 443 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) 444 { 445 return xprt_iter_next_entry_multiple(xpi, 446 xprt_switch_find_next_entry_roundrobin); 447 } 448 449 static 450 struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, 451 const struct rpc_xprt *cur) 452 { 453 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); 454 } 455 456 static 457 struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, 458 const struct rpc_xprt *cur) 459 { 460 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); 461 } 462 463 static 464 struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) 465 { 466 return xprt_iter_next_entry_multiple(xpi, 467 xprt_switch_find_next_entry_all); 468 } 469 470 static 471 struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) 472 { 473 return xprt_iter_next_entry_multiple(xpi, 474 xprt_switch_find_next_entry_offline); 475 } 476 477 /* 478 * xprt_iter_rewind - Resets the xprt iterator 479 * @xpi: pointer to rpc_xprt_iter 480 * 481 * Resets xpi to ensure that it points to the first entry in the list 482 * of transports. 483 */ 484 void xprt_iter_rewind(struct rpc_xprt_iter *xpi) 485 { 486 rcu_read_lock(); 487 xprt_iter_ops(xpi)->xpi_rewind(xpi); 488 rcu_read_unlock(); 489 } 490 491 static void __xprt_iter_init(struct rpc_xprt_iter *xpi, 492 struct rpc_xprt_switch *xps, 493 const struct rpc_xprt_iter_ops *ops) 494 { 495 rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps)); 496 xpi->xpi_cursor = NULL; 497 xpi->xpi_ops = ops; 498 } 499 500 /** 501 * xprt_iter_init - Initialise an xprt iterator 502 * @xpi: pointer to rpc_xprt_iter 503 * @xps: pointer to rpc_xprt_switch 504 * 505 * Initialises the iterator to use the default iterator ops 506 * as set in xps. This function is mainly intended for internal 507 * use in the rpc_client. 508 */ 509 void xprt_iter_init(struct rpc_xprt_iter *xpi, 510 struct rpc_xprt_switch *xps) 511 { 512 __xprt_iter_init(xpi, xps, NULL); 513 } 514 515 /** 516 * xprt_iter_init_listall - Initialise an xprt iterator 517 * @xpi: pointer to rpc_xprt_iter 518 * @xps: pointer to rpc_xprt_switch 519 * 520 * Initialises the iterator to iterate once through the entire list 521 * of entries in xps. 522 */ 523 void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, 524 struct rpc_xprt_switch *xps) 525 { 526 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); 527 } 528 529 void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, 530 struct rpc_xprt_switch *xps) 531 { 532 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); 533 } 534 535 /** 536 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch 537 * @xpi: pointer to rpc_xprt_iter 538 * @newswitch: pointer to a new rpc_xprt_switch or NULL 539 * 540 * Swaps out the existing xpi->xpi_xpswitch with a new value. 541 */ 542 struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, 543 struct rpc_xprt_switch *newswitch) 544 { 545 struct rpc_xprt_switch __rcu *oldswitch; 546 547 /* Atomically swap out the old xpswitch */ 548 oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch)); 549 if (newswitch != NULL) 550 xprt_iter_rewind(xpi); 551 return rcu_dereference_protected(oldswitch, true); 552 } 553 554 /** 555 * xprt_iter_destroy - Destroys the xprt iterator 556 * @xpi: pointer to rpc_xprt_iter 557 */ 558 void xprt_iter_destroy(struct rpc_xprt_iter *xpi) 559 { 560 xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL)); 561 } 562 563 /** 564 * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor 565 * @xpi: pointer to rpc_xprt_iter 566 * 567 * Returns a pointer to the struct rpc_xprt that is currently 568 * pointed to by the cursor. 569 * Caller must be holding rcu_read_lock(). 570 */ 571 struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi) 572 { 573 WARN_ON_ONCE(!rcu_read_lock_held()); 574 return xprt_iter_ops(xpi)->xpi_xprt(xpi); 575 } 576 577 static 578 struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, 579 struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) 580 { 581 struct rpc_xprt *ret; 582 583 do { 584 ret = fn(xpi); 585 if (ret == NULL) 586 break; 587 ret = xprt_get(ret); 588 } while (ret == NULL); 589 return ret; 590 } 591 592 /** 593 * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor 594 * @xpi: pointer to rpc_xprt_iter 595 * 596 * Returns a reference to the struct rpc_xprt that is currently 597 * pointed to by the cursor. 598 */ 599 struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi) 600 { 601 struct rpc_xprt *xprt; 602 603 rcu_read_lock(); 604 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt); 605 rcu_read_unlock(); 606 return xprt; 607 } 608 609 /** 610 * xprt_iter_get_next - Returns the next rpc_xprt following the cursor 611 * @xpi: pointer to rpc_xprt_iter 612 * 613 * Returns a reference to the struct rpc_xprt that immediately follows the 614 * entry pointed to by the cursor. 615 */ 616 struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi) 617 { 618 struct rpc_xprt *xprt; 619 620 rcu_read_lock(); 621 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next); 622 rcu_read_unlock(); 623 return xprt; 624 } 625 626 /* Policy for always returning the first entry in the rpc_xprt_switch */ 627 static 628 const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = { 629 .xpi_rewind = xprt_iter_no_rewind, 630 .xpi_xprt = xprt_iter_first_entry, 631 .xpi_next = xprt_iter_first_entry, 632 }; 633 634 /* Policy for round-robin iteration of entries in the rpc_xprt_switch */ 635 static 636 const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = { 637 .xpi_rewind = xprt_iter_default_rewind, 638 .xpi_xprt = xprt_iter_current_entry, 639 .xpi_next = xprt_iter_next_entry_roundrobin, 640 }; 641 642 /* Policy for once-through iteration of entries in the rpc_xprt_switch */ 643 static 644 const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { 645 .xpi_rewind = xprt_iter_default_rewind, 646 .xpi_xprt = xprt_iter_current_entry, 647 .xpi_next = xprt_iter_next_entry_all, 648 }; 649 650 static 651 const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { 652 .xpi_rewind = xprt_iter_default_rewind, 653 .xpi_xprt = xprt_iter_current_entry_offline, 654 .xpi_next = xprt_iter_next_entry_offline, 655 }; 656