xref: /openbmc/linux/net/sunrpc/xprtmultipath.c (revision a8f4fcdd8ba7d191c29ae87a2315906fe90368d6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Multipath support for RPC
4  *
5  * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
6  *
7  * Trond Myklebust <trond.myklebust@primarydata.com>
8  *
9  */
10 #include <linux/atomic.h>
11 #include <linux/types.h>
12 #include <linux/kref.h>
13 #include <linux/list.h>
14 #include <linux/rcupdate.h>
15 #include <linux/rculist.h>
16 #include <linux/slab.h>
17 #include <linux/spinlock.h>
18 #include <linux/sunrpc/xprt.h>
19 #include <linux/sunrpc/addr.h>
20 #include <linux/sunrpc/xprtmultipath.h>
21 
22 #include "sysfs.h"
23 
24 typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
25 		const struct rpc_xprt *cur);
26 
27 static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
28 static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
29 static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
30 
31 static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
32 		struct rpc_xprt *xprt)
33 {
34 	if (unlikely(xprt_get(xprt) == NULL))
35 		return;
36 	list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list);
37 	smp_wmb();
38 	if (xps->xps_nxprts == 0)
39 		xps->xps_net = xprt->xprt_net;
40 	xps->xps_nxprts++;
41 	xps->xps_nactive++;
42 }
43 
44 /**
45  * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch
46  * @xps: pointer to struct rpc_xprt_switch
47  * @xprt: pointer to struct rpc_xprt
48  *
49  * Adds xprt to the end of the list of struct rpc_xprt in xps.
50  */
51 void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
52 		struct rpc_xprt *xprt)
53 {
54 	if (xprt == NULL)
55 		return;
56 	spin_lock(&xps->xps_lock);
57 	if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
58 		xprt_switch_add_xprt_locked(xps, xprt);
59 	spin_unlock(&xps->xps_lock);
60 	rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL);
61 }
62 
63 static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
64 		struct rpc_xprt *xprt)
65 {
66 	if (unlikely(xprt == NULL))
67 		return;
68 	if (!test_bit(XPRT_OFFLINE, &xprt->state))
69 		xps->xps_nactive--;
70 	xps->xps_nxprts--;
71 	if (xps->xps_nxprts == 0)
72 		xps->xps_net = NULL;
73 	smp_wmb();
74 	list_del_rcu(&xprt->xprt_switch);
75 }
76 
77 /**
78  * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
79  * @xps: pointer to struct rpc_xprt_switch
80  * @xprt: pointer to struct rpc_xprt
81  *
82  * Removes xprt from the list of struct rpc_xprt in xps.
83  */
84 void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
85 		struct rpc_xprt *xprt)
86 {
87 	spin_lock(&xps->xps_lock);
88 	xprt_switch_remove_xprt_locked(xps, xprt);
89 	spin_unlock(&xps->xps_lock);
90 	xprt_put(xprt);
91 }
92 
93 static DEFINE_IDA(rpc_xprtswitch_ids);
94 
95 void xprt_multipath_cleanup_ids(void)
96 {
97 	ida_destroy(&rpc_xprtswitch_ids);
98 }
99 
100 static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
101 {
102 	int id;
103 
104 	id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags);
105 	if (id < 0)
106 		return id;
107 
108 	xps->xps_id = id;
109 	return 0;
110 }
111 
112 static void xprt_switch_free_id(struct rpc_xprt_switch *xps)
113 {
114 	ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id);
115 }
116 
117 /**
118  * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
119  * @xprt: pointer to struct rpc_xprt
120  * @gfp_flags: allocation flags
121  *
122  * On success, returns an initialised struct rpc_xprt_switch, containing
123  * the entry xprt. Returns NULL on failure.
124  */
125 struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
126 		gfp_t gfp_flags)
127 {
128 	struct rpc_xprt_switch *xps;
129 
130 	xps = kmalloc(sizeof(*xps), gfp_flags);
131 	if (xps != NULL) {
132 		spin_lock_init(&xps->xps_lock);
133 		kref_init(&xps->xps_kref);
134 		xprt_switch_alloc_id(xps, gfp_flags);
135 		xps->xps_nxprts = xps->xps_nactive = 0;
136 		atomic_long_set(&xps->xps_queuelen, 0);
137 		xps->xps_net = NULL;
138 		INIT_LIST_HEAD(&xps->xps_xprt_list);
139 		xps->xps_iter_ops = &rpc_xprt_iter_singular;
140 		rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags);
141 		xprt_switch_add_xprt_locked(xps, xprt);
142 		xps->xps_nunique_destaddr_xprts = 1;
143 		rpc_sysfs_xprt_setup(xps, xprt, gfp_flags);
144 	}
145 
146 	return xps;
147 }
148 
149 static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
150 {
151 	spin_lock(&xps->xps_lock);
152 	while (!list_empty(&xps->xps_xprt_list)) {
153 		struct rpc_xprt *xprt;
154 
155 		xprt = list_first_entry(&xps->xps_xprt_list,
156 				struct rpc_xprt, xprt_switch);
157 		xprt_switch_remove_xprt_locked(xps, xprt);
158 		spin_unlock(&xps->xps_lock);
159 		xprt_put(xprt);
160 		spin_lock(&xps->xps_lock);
161 	}
162 	spin_unlock(&xps->xps_lock);
163 }
164 
165 static void xprt_switch_free(struct kref *kref)
166 {
167 	struct rpc_xprt_switch *xps = container_of(kref,
168 			struct rpc_xprt_switch, xps_kref);
169 
170 	xprt_switch_free_entries(xps);
171 	rpc_sysfs_xprt_switch_destroy(xps);
172 	xprt_switch_free_id(xps);
173 	kfree_rcu(xps, xps_rcu);
174 }
175 
176 /**
177  * xprt_switch_get - Return a reference to a rpc_xprt_switch
178  * @xps: pointer to struct rpc_xprt_switch
179  *
180  * Returns a reference to xps unless the refcount is already zero.
181  */
182 struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps)
183 {
184 	if (xps != NULL && kref_get_unless_zero(&xps->xps_kref))
185 		return xps;
186 	return NULL;
187 }
188 
189 /**
190  * xprt_switch_put - Release a reference to a rpc_xprt_switch
191  * @xps: pointer to struct rpc_xprt_switch
192  *
193  * Release the reference to xps, and free it once the refcount is zero.
194  */
195 void xprt_switch_put(struct rpc_xprt_switch *xps)
196 {
197 	if (xps != NULL)
198 		kref_put(&xps->xps_kref, xprt_switch_free);
199 }
200 
201 /**
202  * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch
203  * @xps: pointer to struct rpc_xprt_switch
204  *
205  * Sets a round-robin default policy for iterators acting on xps.
206  */
207 void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps)
208 {
209 	if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin)
210 		WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin);
211 }
212 
213 static
214 const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi)
215 {
216 	if (xpi->xpi_ops != NULL)
217 		return xpi->xpi_ops;
218 	return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops;
219 }
220 
221 static
222 void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi)
223 {
224 }
225 
226 static
227 void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
228 {
229 	WRITE_ONCE(xpi->xpi_cursor, NULL);
230 }
231 
232 static
233 bool xprt_is_active(const struct rpc_xprt *xprt)
234 {
235 	return (kref_read(&xprt->kref) != 0 &&
236 		!test_bit(XPRT_OFFLINE, &xprt->state));
237 }
238 
239 static
240 struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
241 {
242 	struct rpc_xprt *pos;
243 
244 	list_for_each_entry_rcu(pos, head, xprt_switch) {
245 		if (xprt_is_active(pos))
246 			return pos;
247 	}
248 	return NULL;
249 }
250 
251 static
252 struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
253 {
254 	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
255 
256 	if (xps == NULL)
257 		return NULL;
258 	return xprt_switch_find_first_entry(&xps->xps_xprt_list);
259 }
260 
261 static
262 struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
263 		const struct rpc_xprt *cur)
264 {
265 	struct rpc_xprt *pos;
266 	bool found = false;
267 
268 	list_for_each_entry_rcu(pos, head, xprt_switch) {
269 		if (cur == pos)
270 			found = true;
271 		if (found && xprt_is_active(pos))
272 			return pos;
273 	}
274 	return NULL;
275 }
276 
277 static
278 struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
279 {
280 	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
281 	struct list_head *head;
282 
283 	if (xps == NULL)
284 		return NULL;
285 	head = &xps->xps_xprt_list;
286 	if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
287 		return xprt_switch_find_first_entry(head);
288 	return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
289 }
290 
291 bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
292 			      const struct sockaddr *sap)
293 {
294 	struct list_head *head;
295 	struct rpc_xprt *pos;
296 
297 	if (xps == NULL || sap == NULL)
298 		return false;
299 
300 	head = &xps->xps_xprt_list;
301 	list_for_each_entry_rcu(pos, head, xprt_switch) {
302 		if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) {
303 			pr_info("RPC:   addr %s already in xprt switch\n",
304 				pos->address_strings[RPC_DISPLAY_ADDR]);
305 			return true;
306 		}
307 	}
308 	return false;
309 }
310 
311 static
312 struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
313 		const struct rpc_xprt *cur)
314 {
315 	struct rpc_xprt *pos, *prev = NULL;
316 	bool found = false;
317 
318 	list_for_each_entry_rcu(pos, head, xprt_switch) {
319 		if (cur == prev)
320 			found = true;
321 		if (found && xprt_is_active(pos))
322 			return pos;
323 		prev = pos;
324 	}
325 	return NULL;
326 }
327 
328 static
329 struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps,
330 		struct rpc_xprt **cursor,
331 		xprt_switch_find_xprt_t find_next)
332 {
333 	struct rpc_xprt *pos, *old;
334 
335 	old = smp_load_acquire(cursor);
336 	pos = find_next(xps, old);
337 	smp_store_release(cursor, pos);
338 	return pos;
339 }
340 
341 static
342 struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
343 		xprt_switch_find_xprt_t find_next)
344 {
345 	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
346 
347 	if (xps == NULL)
348 		return NULL;
349 	return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next);
350 }
351 
352 static
353 struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head,
354 		const struct rpc_xprt *cur)
355 {
356 	struct rpc_xprt *ret;
357 
358 	ret = xprt_switch_find_next_entry(head, cur);
359 	if (ret != NULL)
360 		return ret;
361 	return xprt_switch_find_first_entry(head);
362 }
363 
364 static
365 struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps,
366 		const struct rpc_xprt *cur)
367 {
368 	struct list_head *head = &xps->xps_xprt_list;
369 	struct rpc_xprt *xprt;
370 	unsigned int nactive;
371 
372 	for (;;) {
373 		unsigned long xprt_queuelen, xps_queuelen;
374 
375 		xprt = __xprt_switch_find_next_entry_roundrobin(head, cur);
376 		if (!xprt)
377 			break;
378 		xprt_queuelen = atomic_long_read(&xprt->queuelen);
379 		xps_queuelen = atomic_long_read(&xps->xps_queuelen);
380 		nactive = READ_ONCE(xps->xps_nactive);
381 		/* Exit loop if xprt_queuelen <= average queue length */
382 		if (xprt_queuelen * nactive <= xps_queuelen)
383 			break;
384 		cur = xprt;
385 	}
386 	return xprt;
387 }
388 
389 static
390 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
391 {
392 	return xprt_iter_next_entry_multiple(xpi,
393 			xprt_switch_find_next_entry_roundrobin);
394 }
395 
396 static
397 struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
398 		const struct rpc_xprt *cur)
399 {
400 	return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
401 }
402 
403 static
404 struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
405 {
406 	return xprt_iter_next_entry_multiple(xpi,
407 			xprt_switch_find_next_entry_all);
408 }
409 
410 /*
411  * xprt_iter_rewind - Resets the xprt iterator
412  * @xpi: pointer to rpc_xprt_iter
413  *
414  * Resets xpi to ensure that it points to the first entry in the list
415  * of transports.
416  */
417 static
418 void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
419 {
420 	rcu_read_lock();
421 	xprt_iter_ops(xpi)->xpi_rewind(xpi);
422 	rcu_read_unlock();
423 }
424 
425 static void __xprt_iter_init(struct rpc_xprt_iter *xpi,
426 		struct rpc_xprt_switch *xps,
427 		const struct rpc_xprt_iter_ops *ops)
428 {
429 	rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps));
430 	xpi->xpi_cursor = NULL;
431 	xpi->xpi_ops = ops;
432 }
433 
434 /**
435  * xprt_iter_init - Initialise an xprt iterator
436  * @xpi: pointer to rpc_xprt_iter
437  * @xps: pointer to rpc_xprt_switch
438  *
439  * Initialises the iterator to use the default iterator ops
440  * as set in xps. This function is mainly intended for internal
441  * use in the rpc_client.
442  */
443 void xprt_iter_init(struct rpc_xprt_iter *xpi,
444 		struct rpc_xprt_switch *xps)
445 {
446 	__xprt_iter_init(xpi, xps, NULL);
447 }
448 
449 /**
450  * xprt_iter_init_listall - Initialise an xprt iterator
451  * @xpi: pointer to rpc_xprt_iter
452  * @xps: pointer to rpc_xprt_switch
453  *
454  * Initialises the iterator to iterate once through the entire list
455  * of entries in xps.
456  */
457 void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
458 		struct rpc_xprt_switch *xps)
459 {
460 	__xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
461 }
462 
463 /**
464  * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
465  * @xpi: pointer to rpc_xprt_iter
466  * @newswitch: pointer to a new rpc_xprt_switch or NULL
467  *
468  * Swaps out the existing xpi->xpi_xpswitch with a new value.
469  */
470 struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
471 		struct rpc_xprt_switch *newswitch)
472 {
473 	struct rpc_xprt_switch __rcu *oldswitch;
474 
475 	/* Atomically swap out the old xpswitch */
476 	oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch));
477 	if (newswitch != NULL)
478 		xprt_iter_rewind(xpi);
479 	return rcu_dereference_protected(oldswitch, true);
480 }
481 
482 /**
483  * xprt_iter_destroy - Destroys the xprt iterator
484  * @xpi: pointer to rpc_xprt_iter
485  */
486 void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
487 {
488 	xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL));
489 }
490 
491 /**
492  * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor
493  * @xpi: pointer to rpc_xprt_iter
494  *
495  * Returns a pointer to the struct rpc_xprt that is currently
496  * pointed to by the cursor.
497  * Caller must be holding rcu_read_lock().
498  */
499 struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi)
500 {
501 	WARN_ON_ONCE(!rcu_read_lock_held());
502 	return xprt_iter_ops(xpi)->xpi_xprt(xpi);
503 }
504 
505 static
506 struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
507 		struct rpc_xprt *(*fn)(struct rpc_xprt_iter *))
508 {
509 	struct rpc_xprt *ret;
510 
511 	do {
512 		ret = fn(xpi);
513 		if (ret == NULL)
514 			break;
515 		ret = xprt_get(ret);
516 	} while (ret == NULL);
517 	return ret;
518 }
519 
520 /**
521  * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
522  * @xpi: pointer to rpc_xprt_iter
523  *
524  * Returns a reference to the struct rpc_xprt that is currently
525  * pointed to by the cursor.
526  */
527 struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
528 {
529 	struct rpc_xprt *xprt;
530 
531 	rcu_read_lock();
532 	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
533 	rcu_read_unlock();
534 	return xprt;
535 }
536 
537 /**
538  * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
539  * @xpi: pointer to rpc_xprt_iter
540  *
541  * Returns a reference to the struct rpc_xprt that immediately follows the
542  * entry pointed to by the cursor.
543  */
544 struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi)
545 {
546 	struct rpc_xprt *xprt;
547 
548 	rcu_read_lock();
549 	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next);
550 	rcu_read_unlock();
551 	return xprt;
552 }
553 
554 /* Policy for always returning the first entry in the rpc_xprt_switch */
555 static
556 const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = {
557 	.xpi_rewind = xprt_iter_no_rewind,
558 	.xpi_xprt = xprt_iter_first_entry,
559 	.xpi_next = xprt_iter_first_entry,
560 };
561 
562 /* Policy for round-robin iteration of entries in the rpc_xprt_switch */
563 static
564 const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = {
565 	.xpi_rewind = xprt_iter_default_rewind,
566 	.xpi_xprt = xprt_iter_current_entry,
567 	.xpi_next = xprt_iter_next_entry_roundrobin,
568 };
569 
570 /* Policy for once-through iteration of entries in the rpc_xprt_switch */
571 static
572 const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
573 	.xpi_rewind = xprt_iter_default_rewind,
574 	.xpi_xprt = xprt_iter_current_entry,
575 	.xpi_next = xprt_iter_next_entry_all,
576 };
577