xref: /openbmc/linux/fs/afs/server.c (revision b7019ac5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 
17 static void afs_inc_servers_outstanding(struct afs_net *net)
18 {
19 	atomic_inc(&net->servers_outstanding);
20 }
21 
22 static void afs_dec_servers_outstanding(struct afs_net *net)
23 {
24 	if (atomic_dec_and_test(&net->servers_outstanding))
25 		wake_up_var(&net->servers_outstanding);
26 }
27 
28 /*
29  * Find a server by one of its addresses.
30  */
31 struct afs_server *afs_find_server(struct afs_net *net,
32 				   const struct sockaddr_rxrpc *srx)
33 {
34 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
35 	const struct afs_addr_list *alist;
36 	struct afs_server *server = NULL;
37 	unsigned int i;
38 	bool ipv6 = true;
39 	int seq = 0, diff;
40 
41 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
42 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
43 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
44 		ipv6 = false;
45 
46 	rcu_read_lock();
47 
48 	do {
49 		if (server)
50 			afs_put_server(net, server);
51 		server = NULL;
52 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
53 
54 		if (ipv6) {
55 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
56 				alist = rcu_dereference(server->addresses);
57 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
58 					b = &alist->addrs[i].transport.sin6;
59 					diff = ((u16 __force)a->sin6_port -
60 						(u16 __force)b->sin6_port);
61 					if (diff == 0)
62 						diff = memcmp(&a->sin6_addr,
63 							      &b->sin6_addr,
64 							      sizeof(struct in6_addr));
65 					if (diff == 0)
66 						goto found;
67 				}
68 			}
69 		} else {
70 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
71 				alist = rcu_dereference(server->addresses);
72 				for (i = 0; i < alist->nr_ipv4; i++) {
73 					b = &alist->addrs[i].transport.sin6;
74 					diff = ((u16 __force)a->sin6_port -
75 						(u16 __force)b->sin6_port);
76 					if (diff == 0)
77 						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
78 							(u32 __force)b->sin6_addr.s6_addr32[3]);
79 					if (diff == 0)
80 						goto found;
81 				}
82 			}
83 		}
84 
85 		server = NULL;
86 	found:
87 		if (server && !atomic_inc_not_zero(&server->usage))
88 			server = NULL;
89 
90 	} while (need_seqretry(&net->fs_addr_lock, seq));
91 
92 	done_seqretry(&net->fs_addr_lock, seq);
93 
94 	rcu_read_unlock();
95 	return server;
96 }
97 
98 /*
99  * Look up a server by its UUID
100  */
101 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
102 {
103 	struct afs_server *server = NULL;
104 	struct rb_node *p;
105 	int diff, seq = 0;
106 
107 	_enter("%pU", uuid);
108 
109 	do {
110 		/* Unfortunately, rbtree walking doesn't give reliable results
111 		 * under just the RCU read lock, so we have to check for
112 		 * changes.
113 		 */
114 		if (server)
115 			afs_put_server(net, server);
116 		server = NULL;
117 
118 		read_seqbegin_or_lock(&net->fs_lock, &seq);
119 
120 		p = net->fs_servers.rb_node;
121 		while (p) {
122 			server = rb_entry(p, struct afs_server, uuid_rb);
123 
124 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
125 			if (diff < 0) {
126 				p = p->rb_left;
127 			} else if (diff > 0) {
128 				p = p->rb_right;
129 			} else {
130 				afs_get_server(server);
131 				break;
132 			}
133 
134 			server = NULL;
135 		}
136 	} while (need_seqretry(&net->fs_lock, seq));
137 
138 	done_seqretry(&net->fs_lock, seq);
139 
140 	_leave(" = %p", server);
141 	return server;
142 }
143 
144 /*
145  * Install a server record in the namespace tree
146  */
147 static struct afs_server *afs_install_server(struct afs_net *net,
148 					     struct afs_server *candidate)
149 {
150 	const struct afs_addr_list *alist;
151 	struct afs_server *server;
152 	struct rb_node **pp, *p;
153 	int ret = -EEXIST, diff;
154 
155 	_enter("%p", candidate);
156 
157 	write_seqlock(&net->fs_lock);
158 
159 	/* Firstly install the server in the UUID lookup tree */
160 	pp = &net->fs_servers.rb_node;
161 	p = NULL;
162 	while (*pp) {
163 		p = *pp;
164 		_debug("- consider %p", p);
165 		server = rb_entry(p, struct afs_server, uuid_rb);
166 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
167 		if (diff < 0)
168 			pp = &(*pp)->rb_left;
169 		else if (diff > 0)
170 			pp = &(*pp)->rb_right;
171 		else
172 			goto exists;
173 	}
174 
175 	server = candidate;
176 	rb_link_node(&server->uuid_rb, p, pp);
177 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
178 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
179 
180 	write_seqlock(&net->fs_addr_lock);
181 	alist = rcu_dereference_protected(server->addresses,
182 					  lockdep_is_held(&net->fs_addr_lock.lock));
183 
184 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
185 	 * it in the IPv4 and/or IPv6 reverse-map lists.
186 	 *
187 	 * TODO: For speed we want to use something other than a flat list
188 	 * here; even sorting the list in terms of lowest address would help a
189 	 * bit, but anything we might want to do gets messy and memory
190 	 * intensive.
191 	 */
192 	if (alist->nr_ipv4 > 0)
193 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
194 	if (alist->nr_addrs > alist->nr_ipv4)
195 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
196 
197 	write_sequnlock(&net->fs_addr_lock);
198 	ret = 0;
199 
200 exists:
201 	afs_get_server(server);
202 	write_sequnlock(&net->fs_lock);
203 	return server;
204 }
205 
206 /*
207  * allocate a new server record
208  */
209 static struct afs_server *afs_alloc_server(struct afs_net *net,
210 					   const uuid_t *uuid,
211 					   struct afs_addr_list *alist)
212 {
213 	struct afs_server *server;
214 
215 	_enter("");
216 
217 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
218 	if (!server)
219 		goto enomem;
220 
221 	atomic_set(&server->usage, 1);
222 	RCU_INIT_POINTER(server->addresses, alist);
223 	server->addr_version = alist->version;
224 	server->uuid = *uuid;
225 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
226 	rwlock_init(&server->fs_lock);
227 	INIT_HLIST_HEAD(&server->cb_volumes);
228 	rwlock_init(&server->cb_break_lock);
229 	init_waitqueue_head(&server->probe_wq);
230 	spin_lock_init(&server->probe_lock);
231 
232 	afs_inc_servers_outstanding(net);
233 	_leave(" = %p", server);
234 	return server;
235 
236 enomem:
237 	_leave(" = NULL [nomem]");
238 	return NULL;
239 }
240 
241 /*
242  * Look up an address record for a server
243  */
244 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
245 						 struct key *key, const uuid_t *uuid)
246 {
247 	struct afs_vl_cursor vc;
248 	struct afs_addr_list *alist = NULL;
249 	int ret;
250 
251 	ret = -ERESTARTSYS;
252 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
253 		while (afs_select_vlserver(&vc)) {
254 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
255 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
256 			else
257 				alist = afs_vl_get_addrs_u(&vc, uuid);
258 		}
259 
260 		ret = afs_end_vlserver_operation(&vc);
261 	}
262 
263 	return ret < 0 ? ERR_PTR(ret) : alist;
264 }
265 
266 /*
267  * Get or create a fileserver record.
268  */
269 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
270 				     const uuid_t *uuid)
271 {
272 	struct afs_addr_list *alist;
273 	struct afs_server *server, *candidate;
274 
275 	_enter("%p,%pU", cell->net, uuid);
276 
277 	server = afs_find_server_by_uuid(cell->net, uuid);
278 	if (server)
279 		return server;
280 
281 	alist = afs_vl_lookup_addrs(cell, key, uuid);
282 	if (IS_ERR(alist))
283 		return ERR_CAST(alist);
284 
285 	candidate = afs_alloc_server(cell->net, uuid, alist);
286 	if (!candidate) {
287 		afs_put_addrlist(alist);
288 		return ERR_PTR(-ENOMEM);
289 	}
290 
291 	server = afs_install_server(cell->net, candidate);
292 	if (server != candidate) {
293 		afs_put_addrlist(alist);
294 		kfree(candidate);
295 	}
296 
297 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
298 	return server;
299 }
300 
301 /*
302  * Set the server timer to fire after a given delay, assuming it's not already
303  * set for an earlier time.
304  */
305 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
306 {
307 	if (net->live) {
308 		afs_inc_servers_outstanding(net);
309 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
310 			afs_dec_servers_outstanding(net);
311 	}
312 }
313 
314 /*
315  * Server management timer.  We have an increment on fs_outstanding that we
316  * need to pass along to the work item.
317  */
318 void afs_servers_timer(struct timer_list *timer)
319 {
320 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
321 
322 	_enter("");
323 	if (!queue_work(afs_wq, &net->fs_manager))
324 		afs_dec_servers_outstanding(net);
325 }
326 
327 /*
328  * Release a reference on a server record.
329  */
330 void afs_put_server(struct afs_net *net, struct afs_server *server)
331 {
332 	unsigned int usage;
333 
334 	if (!server)
335 		return;
336 
337 	server->put_time = ktime_get_real_seconds();
338 
339 	usage = atomic_dec_return(&server->usage);
340 
341 	_enter("{%u}", usage);
342 
343 	if (likely(usage > 0))
344 		return;
345 
346 	afs_set_server_timer(net, afs_server_gc_delay);
347 }
348 
349 static void afs_server_rcu(struct rcu_head *rcu)
350 {
351 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
352 
353 	afs_put_addrlist(rcu_access_pointer(server->addresses));
354 	kfree(server);
355 }
356 
357 /*
358  * destroy a dead server
359  */
360 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
361 {
362 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
363 	struct afs_addr_cursor ac = {
364 		.alist	= alist,
365 		.index	= alist->preferred,
366 		.error	= 0,
367 	};
368 	_enter("%p", server);
369 
370 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
371 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
372 
373 	wait_var_event(&server->probe_outstanding,
374 		       atomic_read(&server->probe_outstanding) == 0);
375 
376 	call_rcu(&server->rcu, afs_server_rcu);
377 	afs_dec_servers_outstanding(net);
378 }
379 
380 /*
381  * Garbage collect any expired servers.
382  */
383 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
384 {
385 	struct afs_server *server;
386 	bool deleted;
387 	int usage;
388 
389 	while ((server = gc_list)) {
390 		gc_list = server->gc_next;
391 
392 		write_seqlock(&net->fs_lock);
393 		usage = 1;
394 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
395 		if (deleted) {
396 			rb_erase(&server->uuid_rb, &net->fs_servers);
397 			hlist_del_rcu(&server->proc_link);
398 		}
399 		write_sequnlock(&net->fs_lock);
400 
401 		if (deleted) {
402 			write_seqlock(&net->fs_addr_lock);
403 			if (!hlist_unhashed(&server->addr4_link))
404 				hlist_del_rcu(&server->addr4_link);
405 			if (!hlist_unhashed(&server->addr6_link))
406 				hlist_del_rcu(&server->addr6_link);
407 			write_sequnlock(&net->fs_addr_lock);
408 			afs_destroy_server(net, server);
409 		}
410 	}
411 }
412 
413 /*
414  * Manage the records of servers known to be within a network namespace.  This
415  * includes garbage collecting unused servers.
416  *
417  * Note also that we were given an increment on net->servers_outstanding by
418  * whoever queued us that we need to deal with before returning.
419  */
420 void afs_manage_servers(struct work_struct *work)
421 {
422 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
423 	struct afs_server *gc_list = NULL;
424 	struct rb_node *cursor;
425 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
426 	bool purging = !net->live;
427 
428 	_enter("");
429 
430 	/* Trawl the server list looking for servers that have expired from
431 	 * lack of use.
432 	 */
433 	read_seqlock_excl(&net->fs_lock);
434 
435 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
436 		struct afs_server *server =
437 			rb_entry(cursor, struct afs_server, uuid_rb);
438 		int usage = atomic_read(&server->usage);
439 
440 		_debug("manage %pU %u", &server->uuid, usage);
441 
442 		ASSERTCMP(usage, >=, 1);
443 		ASSERTIFCMP(purging, usage, ==, 1);
444 
445 		if (usage == 1) {
446 			time64_t expire_at = server->put_time;
447 
448 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
449 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
450 				expire_at += afs_server_gc_delay;
451 			if (purging || expire_at <= now) {
452 				server->gc_next = gc_list;
453 				gc_list = server;
454 			} else if (expire_at < next_manage) {
455 				next_manage = expire_at;
456 			}
457 		}
458 	}
459 
460 	read_sequnlock_excl(&net->fs_lock);
461 
462 	/* Update the timer on the way out.  We have to pass an increment on
463 	 * servers_outstanding in the namespace that we are in to the timer or
464 	 * the work scheduler.
465 	 */
466 	if (!purging && next_manage < TIME64_MAX) {
467 		now = ktime_get_real_seconds();
468 
469 		if (next_manage - now <= 0) {
470 			if (queue_work(afs_wq, &net->fs_manager))
471 				afs_inc_servers_outstanding(net);
472 		} else {
473 			afs_set_server_timer(net, next_manage - now);
474 		}
475 	}
476 
477 	afs_gc_servers(net, gc_list);
478 
479 	afs_dec_servers_outstanding(net);
480 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
481 }
482 
483 static void afs_queue_server_manager(struct afs_net *net)
484 {
485 	afs_inc_servers_outstanding(net);
486 	if (!queue_work(afs_wq, &net->fs_manager))
487 		afs_dec_servers_outstanding(net);
488 }
489 
490 /*
491  * Purge list of servers.
492  */
493 void afs_purge_servers(struct afs_net *net)
494 {
495 	_enter("");
496 
497 	if (del_timer_sync(&net->fs_timer))
498 		atomic_dec(&net->servers_outstanding);
499 
500 	afs_queue_server_manager(net);
501 
502 	_debug("wait");
503 	wait_var_event(&net->servers_outstanding,
504 		       !atomic_read(&net->servers_outstanding));
505 	_leave("");
506 }
507 
508 /*
509  * Get an update for a server's address list.
510  */
511 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
512 {
513 	struct afs_addr_list *alist, *discard;
514 
515 	_enter("");
516 
517 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
518 				    &server->uuid);
519 	if (IS_ERR(alist)) {
520 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
521 		     PTR_ERR(alist) == -EINTR) &&
522 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
523 		    server->addresses) {
524 			_leave(" = t [intr]");
525 			return true;
526 		}
527 		fc->error = PTR_ERR(alist);
528 		_leave(" = f [%d]", fc->error);
529 		return false;
530 	}
531 
532 	discard = alist;
533 	if (server->addr_version != alist->version) {
534 		write_lock(&server->fs_lock);
535 		discard = rcu_dereference_protected(server->addresses,
536 						    lockdep_is_held(&server->fs_lock));
537 		rcu_assign_pointer(server->addresses, alist);
538 		server->addr_version = alist->version;
539 		write_unlock(&server->fs_lock);
540 	}
541 
542 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
543 	afs_put_addrlist(discard);
544 	_leave(" = t");
545 	return true;
546 }
547 
548 /*
549  * See if a server's address list needs updating.
550  */
551 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
552 {
553 	time64_t now = ktime_get_real_seconds();
554 	long diff;
555 	bool success;
556 	int ret, retries = 0;
557 
558 	_enter("");
559 
560 	ASSERT(server);
561 
562 retry:
563 	diff = READ_ONCE(server->update_at) - now;
564 	if (diff > 0) {
565 		_leave(" = t [not now %ld]", diff);
566 		return true;
567 	}
568 
569 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
570 		success = afs_update_server_record(fc, server);
571 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
572 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
573 		_leave(" = %d", success);
574 		return success;
575 	}
576 
577 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
578 			  TASK_INTERRUPTIBLE);
579 	if (ret == -ERESTARTSYS) {
580 		if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
581 			_leave(" = t [intr]");
582 			return true;
583 		}
584 		fc->error = ret;
585 		_leave(" = f [intr]");
586 		return false;
587 	}
588 
589 	retries++;
590 	if (retries == 4) {
591 		_leave(" = f [stale]");
592 		ret = -ESTALE;
593 		return false;
594 	}
595 	goto retry;
596 }
597