xref: /openbmc/linux/fs/afs/server.c (revision d203c2d3)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17 
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20 	atomic_inc(&net->servers_outstanding);
21 }
22 
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25 	if (atomic_dec_and_test(&net->servers_outstanding))
26 		wake_up_var(&net->servers_outstanding);
27 }
28 
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33 				   const struct sockaddr_rxrpc *srx)
34 {
35 	const struct afs_addr_list *alist;
36 	struct afs_server *server = NULL;
37 	unsigned int i;
38 	int seq = 0, diff;
39 
40 	rcu_read_lock();
41 
42 	do {
43 		if (server)
44 			afs_put_server(net, server, afs_server_trace_put_find_rsq);
45 		server = NULL;
46 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
47 
48 		if (srx->transport.family == AF_INET6) {
49 			const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
50 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
51 				alist = rcu_dereference(server->addresses);
52 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
53 					b = &alist->addrs[i].transport.sin6;
54 					diff = ((u16 __force)a->sin6_port -
55 						(u16 __force)b->sin6_port);
56 					if (diff == 0)
57 						diff = memcmp(&a->sin6_addr,
58 							      &b->sin6_addr,
59 							      sizeof(struct in6_addr));
60 					if (diff == 0)
61 						goto found;
62 				}
63 			}
64 		} else {
65 			const struct sockaddr_in *a = &srx->transport.sin, *b;
66 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
67 				alist = rcu_dereference(server->addresses);
68 				for (i = 0; i < alist->nr_ipv4; i++) {
69 					b = &alist->addrs[i].transport.sin;
70 					diff = ((u16 __force)a->sin_port -
71 						(u16 __force)b->sin_port);
72 					if (diff == 0)
73 						diff = ((u32 __force)a->sin_addr.s_addr -
74 							(u32 __force)b->sin_addr.s_addr);
75 					if (diff == 0)
76 						goto found;
77 				}
78 			}
79 		}
80 
81 		server = NULL;
82 	found:
83 		if (server && !atomic_inc_not_zero(&server->usage))
84 			server = NULL;
85 
86 	} while (need_seqretry(&net->fs_addr_lock, seq));
87 
88 	done_seqretry(&net->fs_addr_lock, seq);
89 
90 	rcu_read_unlock();
91 	return server;
92 }
93 
94 /*
95  * Look up a server by its UUID
96  */
97 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
98 {
99 	struct afs_server *server = NULL;
100 	struct rb_node *p;
101 	int diff, seq = 0;
102 
103 	_enter("%pU", uuid);
104 
105 	do {
106 		/* Unfortunately, rbtree walking doesn't give reliable results
107 		 * under just the RCU read lock, so we have to check for
108 		 * changes.
109 		 */
110 		if (server)
111 			afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
112 		server = NULL;
113 
114 		read_seqbegin_or_lock(&net->fs_lock, &seq);
115 
116 		p = net->fs_servers.rb_node;
117 		while (p) {
118 			server = rb_entry(p, struct afs_server, uuid_rb);
119 
120 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
121 			if (diff < 0) {
122 				p = p->rb_left;
123 			} else if (diff > 0) {
124 				p = p->rb_right;
125 			} else {
126 				afs_get_server(server, afs_server_trace_get_by_uuid);
127 				break;
128 			}
129 
130 			server = NULL;
131 		}
132 	} while (need_seqretry(&net->fs_lock, seq));
133 
134 	done_seqretry(&net->fs_lock, seq);
135 
136 	_leave(" = %p", server);
137 	return server;
138 }
139 
140 /*
141  * Install a server record in the namespace tree
142  */
143 static struct afs_server *afs_install_server(struct afs_net *net,
144 					     struct afs_server *candidate)
145 {
146 	const struct afs_addr_list *alist;
147 	struct afs_server *server;
148 	struct rb_node **pp, *p;
149 	int diff;
150 
151 	_enter("%p", candidate);
152 
153 	write_seqlock(&net->fs_lock);
154 
155 	/* Firstly install the server in the UUID lookup tree */
156 	pp = &net->fs_servers.rb_node;
157 	p = NULL;
158 	while (*pp) {
159 		p = *pp;
160 		_debug("- consider %p", p);
161 		server = rb_entry(p, struct afs_server, uuid_rb);
162 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
163 		if (diff < 0)
164 			pp = &(*pp)->rb_left;
165 		else if (diff > 0)
166 			pp = &(*pp)->rb_right;
167 		else
168 			goto exists;
169 	}
170 
171 	server = candidate;
172 	rb_link_node(&server->uuid_rb, p, pp);
173 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
174 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
175 
176 	write_seqlock(&net->fs_addr_lock);
177 	alist = rcu_dereference_protected(server->addresses,
178 					  lockdep_is_held(&net->fs_addr_lock.lock));
179 
180 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
181 	 * it in the IPv4 and/or IPv6 reverse-map lists.
182 	 *
183 	 * TODO: For speed we want to use something other than a flat list
184 	 * here; even sorting the list in terms of lowest address would help a
185 	 * bit, but anything we might want to do gets messy and memory
186 	 * intensive.
187 	 */
188 	if (alist->nr_ipv4 > 0)
189 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
190 	if (alist->nr_addrs > alist->nr_ipv4)
191 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
192 
193 	write_sequnlock(&net->fs_addr_lock);
194 
195 exists:
196 	afs_get_server(server, afs_server_trace_get_install);
197 	write_sequnlock(&net->fs_lock);
198 	return server;
199 }
200 
201 /*
202  * allocate a new server record
203  */
204 static struct afs_server *afs_alloc_server(struct afs_net *net,
205 					   const uuid_t *uuid,
206 					   struct afs_addr_list *alist)
207 {
208 	struct afs_server *server;
209 
210 	_enter("");
211 
212 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
213 	if (!server)
214 		goto enomem;
215 
216 	atomic_set(&server->usage, 1);
217 	server->debug_id = atomic_inc_return(&afs_server_debug_id);
218 	RCU_INIT_POINTER(server->addresses, alist);
219 	server->addr_version = alist->version;
220 	server->uuid = *uuid;
221 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
222 	rwlock_init(&server->fs_lock);
223 	INIT_HLIST_HEAD(&server->cb_volumes);
224 	rwlock_init(&server->cb_break_lock);
225 	init_waitqueue_head(&server->probe_wq);
226 	spin_lock_init(&server->probe_lock);
227 
228 	afs_inc_servers_outstanding(net);
229 	trace_afs_server(server, 1, afs_server_trace_alloc);
230 	_leave(" = %p", server);
231 	return server;
232 
233 enomem:
234 	_leave(" = NULL [nomem]");
235 	return NULL;
236 }
237 
238 /*
239  * Look up an address record for a server
240  */
241 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
242 						 struct key *key, const uuid_t *uuid)
243 {
244 	struct afs_vl_cursor vc;
245 	struct afs_addr_list *alist = NULL;
246 	int ret;
247 
248 	ret = -ERESTARTSYS;
249 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
250 		while (afs_select_vlserver(&vc)) {
251 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
252 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
253 			else
254 				alist = afs_vl_get_addrs_u(&vc, uuid);
255 		}
256 
257 		ret = afs_end_vlserver_operation(&vc);
258 	}
259 
260 	return ret < 0 ? ERR_PTR(ret) : alist;
261 }
262 
263 /*
264  * Get or create a fileserver record.
265  */
266 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
267 				     const uuid_t *uuid)
268 {
269 	struct afs_addr_list *alist;
270 	struct afs_server *server, *candidate;
271 
272 	_enter("%p,%pU", cell->net, uuid);
273 
274 	server = afs_find_server_by_uuid(cell->net, uuid);
275 	if (server)
276 		return server;
277 
278 	alist = afs_vl_lookup_addrs(cell, key, uuid);
279 	if (IS_ERR(alist))
280 		return ERR_CAST(alist);
281 
282 	candidate = afs_alloc_server(cell->net, uuid, alist);
283 	if (!candidate) {
284 		afs_put_addrlist(alist);
285 		return ERR_PTR(-ENOMEM);
286 	}
287 
288 	server = afs_install_server(cell->net, candidate);
289 	if (server != candidate) {
290 		afs_put_addrlist(alist);
291 		kfree(candidate);
292 	}
293 
294 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
295 	return server;
296 }
297 
298 /*
299  * Set the server timer to fire after a given delay, assuming it's not already
300  * set for an earlier time.
301  */
302 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
303 {
304 	if (net->live) {
305 		afs_inc_servers_outstanding(net);
306 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
307 			afs_dec_servers_outstanding(net);
308 	}
309 }
310 
311 /*
312  * Server management timer.  We have an increment on fs_outstanding that we
313  * need to pass along to the work item.
314  */
315 void afs_servers_timer(struct timer_list *timer)
316 {
317 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
318 
319 	_enter("");
320 	if (!queue_work(afs_wq, &net->fs_manager))
321 		afs_dec_servers_outstanding(net);
322 }
323 
324 /*
325  * Get a reference on a server object.
326  */
327 struct afs_server *afs_get_server(struct afs_server *server,
328 				  enum afs_server_trace reason)
329 {
330 	unsigned int u = atomic_inc_return(&server->usage);
331 
332 	trace_afs_server(server, u, reason);
333 	return server;
334 }
335 
336 /*
337  * Release a reference on a server record.
338  */
339 void afs_put_server(struct afs_net *net, struct afs_server *server,
340 		    enum afs_server_trace reason)
341 {
342 	unsigned int usage;
343 
344 	if (!server)
345 		return;
346 
347 	server->put_time = ktime_get_real_seconds();
348 
349 	usage = atomic_dec_return(&server->usage);
350 
351 	trace_afs_server(server, usage, reason);
352 
353 	if (likely(usage > 0))
354 		return;
355 
356 	afs_set_server_timer(net, afs_server_gc_delay);
357 }
358 
359 static void afs_server_rcu(struct rcu_head *rcu)
360 {
361 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
362 
363 	trace_afs_server(server, atomic_read(&server->usage),
364 			 afs_server_trace_free);
365 	afs_put_addrlist(rcu_access_pointer(server->addresses));
366 	kfree(server);
367 }
368 
369 /*
370  * destroy a dead server
371  */
372 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
373 {
374 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
375 	struct afs_addr_cursor ac = {
376 		.alist	= alist,
377 		.index	= alist->preferred,
378 		.error	= 0,
379 	};
380 
381 	trace_afs_server(server, atomic_read(&server->usage),
382 			 afs_server_trace_give_up_cb);
383 
384 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
385 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
386 
387 	wait_var_event(&server->probe_outstanding,
388 		       atomic_read(&server->probe_outstanding) == 0);
389 
390 	trace_afs_server(server, atomic_read(&server->usage),
391 			 afs_server_trace_destroy);
392 	call_rcu(&server->rcu, afs_server_rcu);
393 	afs_dec_servers_outstanding(net);
394 }
395 
396 /*
397  * Garbage collect any expired servers.
398  */
399 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
400 {
401 	struct afs_server *server;
402 	bool deleted;
403 	int usage;
404 
405 	while ((server = gc_list)) {
406 		gc_list = server->gc_next;
407 
408 		write_seqlock(&net->fs_lock);
409 		usage = 1;
410 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
411 		trace_afs_server(server, usage, afs_server_trace_gc);
412 		if (deleted) {
413 			rb_erase(&server->uuid_rb, &net->fs_servers);
414 			hlist_del_rcu(&server->proc_link);
415 		}
416 		write_sequnlock(&net->fs_lock);
417 
418 		if (deleted) {
419 			write_seqlock(&net->fs_addr_lock);
420 			if (!hlist_unhashed(&server->addr4_link))
421 				hlist_del_rcu(&server->addr4_link);
422 			if (!hlist_unhashed(&server->addr6_link))
423 				hlist_del_rcu(&server->addr6_link);
424 			write_sequnlock(&net->fs_addr_lock);
425 			afs_destroy_server(net, server);
426 		}
427 	}
428 }
429 
430 /*
431  * Manage the records of servers known to be within a network namespace.  This
432  * includes garbage collecting unused servers.
433  *
434  * Note also that we were given an increment on net->servers_outstanding by
435  * whoever queued us that we need to deal with before returning.
436  */
437 void afs_manage_servers(struct work_struct *work)
438 {
439 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
440 	struct afs_server *gc_list = NULL;
441 	struct rb_node *cursor;
442 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
443 	bool purging = !net->live;
444 
445 	_enter("");
446 
447 	/* Trawl the server list looking for servers that have expired from
448 	 * lack of use.
449 	 */
450 	read_seqlock_excl(&net->fs_lock);
451 
452 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
453 		struct afs_server *server =
454 			rb_entry(cursor, struct afs_server, uuid_rb);
455 		int usage = atomic_read(&server->usage);
456 
457 		_debug("manage %pU %u", &server->uuid, usage);
458 
459 		ASSERTCMP(usage, >=, 1);
460 		ASSERTIFCMP(purging, usage, ==, 1);
461 
462 		if (usage == 1) {
463 			time64_t expire_at = server->put_time;
464 
465 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
466 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
467 				expire_at += afs_server_gc_delay;
468 			if (purging || expire_at <= now) {
469 				server->gc_next = gc_list;
470 				gc_list = server;
471 			} else if (expire_at < next_manage) {
472 				next_manage = expire_at;
473 			}
474 		}
475 	}
476 
477 	read_sequnlock_excl(&net->fs_lock);
478 
479 	/* Update the timer on the way out.  We have to pass an increment on
480 	 * servers_outstanding in the namespace that we are in to the timer or
481 	 * the work scheduler.
482 	 */
483 	if (!purging && next_manage < TIME64_MAX) {
484 		now = ktime_get_real_seconds();
485 
486 		if (next_manage - now <= 0) {
487 			if (queue_work(afs_wq, &net->fs_manager))
488 				afs_inc_servers_outstanding(net);
489 		} else {
490 			afs_set_server_timer(net, next_manage - now);
491 		}
492 	}
493 
494 	afs_gc_servers(net, gc_list);
495 
496 	afs_dec_servers_outstanding(net);
497 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
498 }
499 
500 static void afs_queue_server_manager(struct afs_net *net)
501 {
502 	afs_inc_servers_outstanding(net);
503 	if (!queue_work(afs_wq, &net->fs_manager))
504 		afs_dec_servers_outstanding(net);
505 }
506 
507 /*
508  * Purge list of servers.
509  */
510 void afs_purge_servers(struct afs_net *net)
511 {
512 	_enter("");
513 
514 	if (del_timer_sync(&net->fs_timer))
515 		atomic_dec(&net->servers_outstanding);
516 
517 	afs_queue_server_manager(net);
518 
519 	_debug("wait");
520 	wait_var_event(&net->servers_outstanding,
521 		       !atomic_read(&net->servers_outstanding));
522 	_leave("");
523 }
524 
525 /*
526  * Get an update for a server's address list.
527  */
528 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
529 {
530 	struct afs_addr_list *alist, *discard;
531 
532 	_enter("");
533 
534 	trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
535 
536 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
537 				    &server->uuid);
538 	if (IS_ERR(alist)) {
539 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
540 		     PTR_ERR(alist) == -EINTR) &&
541 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
542 		    server->addresses) {
543 			_leave(" = t [intr]");
544 			return true;
545 		}
546 		fc->error = PTR_ERR(alist);
547 		_leave(" = f [%d]", fc->error);
548 		return false;
549 	}
550 
551 	discard = alist;
552 	if (server->addr_version != alist->version) {
553 		write_lock(&server->fs_lock);
554 		discard = rcu_dereference_protected(server->addresses,
555 						    lockdep_is_held(&server->fs_lock));
556 		rcu_assign_pointer(server->addresses, alist);
557 		server->addr_version = alist->version;
558 		write_unlock(&server->fs_lock);
559 	}
560 
561 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
562 	afs_put_addrlist(discard);
563 	_leave(" = t");
564 	return true;
565 }
566 
567 /*
568  * See if a server's address list needs updating.
569  */
570 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
571 {
572 	time64_t now = ktime_get_real_seconds();
573 	long diff;
574 	bool success;
575 	int ret, retries = 0;
576 
577 	_enter("");
578 
579 	ASSERT(server);
580 
581 retry:
582 	diff = READ_ONCE(server->update_at) - now;
583 	if (diff > 0) {
584 		_leave(" = t [not now %ld]", diff);
585 		return true;
586 	}
587 
588 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
589 		success = afs_update_server_record(fc, server);
590 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
591 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
592 		_leave(" = %d", success);
593 		return success;
594 	}
595 
596 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
597 			  TASK_INTERRUPTIBLE);
598 	if (ret == -ERESTARTSYS) {
599 		if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
600 			_leave(" = t [intr]");
601 			return true;
602 		}
603 		fc->error = ret;
604 		_leave(" = f [intr]");
605 		return false;
606 	}
607 
608 	retries++;
609 	if (retries == 4) {
610 		_leave(" = f [stale]");
611 		ret = -ESTALE;
612 		return false;
613 	}
614 	goto retry;
615 }
616