xref: /openbmc/linux/fs/afs/server.c (revision 0a671dc5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17 
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20 	atomic_inc(&net->servers_outstanding);
21 }
22 
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25 	if (atomic_dec_and_test(&net->servers_outstanding))
26 		wake_up_var(&net->servers_outstanding);
27 }
28 
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33 				   const struct sockaddr_rxrpc *srx)
34 {
35 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
36 	const struct afs_addr_list *alist;
37 	struct afs_server *server = NULL;
38 	unsigned int i;
39 	bool ipv6 = true;
40 	int seq = 0, diff;
41 
42 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
43 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
44 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
45 		ipv6 = false;
46 
47 	rcu_read_lock();
48 
49 	do {
50 		if (server)
51 			afs_put_server(net, server, afs_server_trace_put_find_rsq);
52 		server = NULL;
53 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
54 
55 		if (ipv6) {
56 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
57 				alist = rcu_dereference(server->addresses);
58 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
59 					b = &alist->addrs[i].transport.sin6;
60 					diff = ((u16 __force)a->sin6_port -
61 						(u16 __force)b->sin6_port);
62 					if (diff == 0)
63 						diff = memcmp(&a->sin6_addr,
64 							      &b->sin6_addr,
65 							      sizeof(struct in6_addr));
66 					if (diff == 0)
67 						goto found;
68 				}
69 			}
70 		} else {
71 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
72 				alist = rcu_dereference(server->addresses);
73 				for (i = 0; i < alist->nr_ipv4; i++) {
74 					b = &alist->addrs[i].transport.sin6;
75 					diff = ((u16 __force)a->sin6_port -
76 						(u16 __force)b->sin6_port);
77 					if (diff == 0)
78 						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
79 							(u32 __force)b->sin6_addr.s6_addr32[3]);
80 					if (diff == 0)
81 						goto found;
82 				}
83 			}
84 		}
85 
86 		server = NULL;
87 	found:
88 		if (server && !atomic_inc_not_zero(&server->usage))
89 			server = NULL;
90 
91 	} while (need_seqretry(&net->fs_addr_lock, seq));
92 
93 	done_seqretry(&net->fs_addr_lock, seq);
94 
95 	rcu_read_unlock();
96 	return server;
97 }
98 
99 /*
100  * Look up a server by its UUID
101  */
102 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
103 {
104 	struct afs_server *server = NULL;
105 	struct rb_node *p;
106 	int diff, seq = 0;
107 
108 	_enter("%pU", uuid);
109 
110 	do {
111 		/* Unfortunately, rbtree walking doesn't give reliable results
112 		 * under just the RCU read lock, so we have to check for
113 		 * changes.
114 		 */
115 		if (server)
116 			afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
117 		server = NULL;
118 
119 		read_seqbegin_or_lock(&net->fs_lock, &seq);
120 
121 		p = net->fs_servers.rb_node;
122 		while (p) {
123 			server = rb_entry(p, struct afs_server, uuid_rb);
124 
125 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
126 			if (diff < 0) {
127 				p = p->rb_left;
128 			} else if (diff > 0) {
129 				p = p->rb_right;
130 			} else {
131 				afs_get_server(server, afs_server_trace_get_by_uuid);
132 				break;
133 			}
134 
135 			server = NULL;
136 		}
137 	} while (need_seqretry(&net->fs_lock, seq));
138 
139 	done_seqretry(&net->fs_lock, seq);
140 
141 	_leave(" = %p", server);
142 	return server;
143 }
144 
145 /*
146  * Install a server record in the namespace tree
147  */
148 static struct afs_server *afs_install_server(struct afs_net *net,
149 					     struct afs_server *candidate)
150 {
151 	const struct afs_addr_list *alist;
152 	struct afs_server *server;
153 	struct rb_node **pp, *p;
154 	int ret = -EEXIST, diff;
155 
156 	_enter("%p", candidate);
157 
158 	write_seqlock(&net->fs_lock);
159 
160 	/* Firstly install the server in the UUID lookup tree */
161 	pp = &net->fs_servers.rb_node;
162 	p = NULL;
163 	while (*pp) {
164 		p = *pp;
165 		_debug("- consider %p", p);
166 		server = rb_entry(p, struct afs_server, uuid_rb);
167 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
168 		if (diff < 0)
169 			pp = &(*pp)->rb_left;
170 		else if (diff > 0)
171 			pp = &(*pp)->rb_right;
172 		else
173 			goto exists;
174 	}
175 
176 	server = candidate;
177 	rb_link_node(&server->uuid_rb, p, pp);
178 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
179 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
180 
181 	write_seqlock(&net->fs_addr_lock);
182 	alist = rcu_dereference_protected(server->addresses,
183 					  lockdep_is_held(&net->fs_addr_lock.lock));
184 
185 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
186 	 * it in the IPv4 and/or IPv6 reverse-map lists.
187 	 *
188 	 * TODO: For speed we want to use something other than a flat list
189 	 * here; even sorting the list in terms of lowest address would help a
190 	 * bit, but anything we might want to do gets messy and memory
191 	 * intensive.
192 	 */
193 	if (alist->nr_ipv4 > 0)
194 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
195 	if (alist->nr_addrs > alist->nr_ipv4)
196 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
197 
198 	write_sequnlock(&net->fs_addr_lock);
199 	ret = 0;
200 
201 exists:
202 	afs_get_server(server, afs_server_trace_get_install);
203 	write_sequnlock(&net->fs_lock);
204 	return server;
205 }
206 
207 /*
208  * allocate a new server record
209  */
210 static struct afs_server *afs_alloc_server(struct afs_net *net,
211 					   const uuid_t *uuid,
212 					   struct afs_addr_list *alist)
213 {
214 	struct afs_server *server;
215 
216 	_enter("");
217 
218 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
219 	if (!server)
220 		goto enomem;
221 
222 	atomic_set(&server->usage, 1);
223 	server->debug_id = atomic_inc_return(&afs_server_debug_id);
224 	RCU_INIT_POINTER(server->addresses, alist);
225 	server->addr_version = alist->version;
226 	server->uuid = *uuid;
227 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
228 	rwlock_init(&server->fs_lock);
229 	INIT_HLIST_HEAD(&server->cb_volumes);
230 	rwlock_init(&server->cb_break_lock);
231 	init_waitqueue_head(&server->probe_wq);
232 	spin_lock_init(&server->probe_lock);
233 
234 	afs_inc_servers_outstanding(net);
235 	trace_afs_server(server, 1, afs_server_trace_alloc);
236 	_leave(" = %p", server);
237 	return server;
238 
239 enomem:
240 	_leave(" = NULL [nomem]");
241 	return NULL;
242 }
243 
244 /*
245  * Look up an address record for a server
246  */
247 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
248 						 struct key *key, const uuid_t *uuid)
249 {
250 	struct afs_vl_cursor vc;
251 	struct afs_addr_list *alist = NULL;
252 	int ret;
253 
254 	ret = -ERESTARTSYS;
255 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
256 		while (afs_select_vlserver(&vc)) {
257 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
258 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
259 			else
260 				alist = afs_vl_get_addrs_u(&vc, uuid);
261 		}
262 
263 		ret = afs_end_vlserver_operation(&vc);
264 	}
265 
266 	return ret < 0 ? ERR_PTR(ret) : alist;
267 }
268 
269 /*
270  * Get or create a fileserver record.
271  */
272 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
273 				     const uuid_t *uuid)
274 {
275 	struct afs_addr_list *alist;
276 	struct afs_server *server, *candidate;
277 
278 	_enter("%p,%pU", cell->net, uuid);
279 
280 	server = afs_find_server_by_uuid(cell->net, uuid);
281 	if (server)
282 		return server;
283 
284 	alist = afs_vl_lookup_addrs(cell, key, uuid);
285 	if (IS_ERR(alist))
286 		return ERR_CAST(alist);
287 
288 	candidate = afs_alloc_server(cell->net, uuid, alist);
289 	if (!candidate) {
290 		afs_put_addrlist(alist);
291 		return ERR_PTR(-ENOMEM);
292 	}
293 
294 	server = afs_install_server(cell->net, candidate);
295 	if (server != candidate) {
296 		afs_put_addrlist(alist);
297 		kfree(candidate);
298 	}
299 
300 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
301 	return server;
302 }
303 
304 /*
305  * Set the server timer to fire after a given delay, assuming it's not already
306  * set for an earlier time.
307  */
308 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
309 {
310 	if (net->live) {
311 		afs_inc_servers_outstanding(net);
312 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
313 			afs_dec_servers_outstanding(net);
314 	}
315 }
316 
317 /*
318  * Server management timer.  We have an increment on fs_outstanding that we
319  * need to pass along to the work item.
320  */
321 void afs_servers_timer(struct timer_list *timer)
322 {
323 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
324 
325 	_enter("");
326 	if (!queue_work(afs_wq, &net->fs_manager))
327 		afs_dec_servers_outstanding(net);
328 }
329 
330 /*
331  * Get a reference on a server object.
332  */
333 struct afs_server *afs_get_server(struct afs_server *server,
334 				  enum afs_server_trace reason)
335 {
336 	unsigned int u = atomic_inc_return(&server->usage);
337 
338 	trace_afs_server(server, u, reason);
339 	return server;
340 }
341 
342 /*
343  * Release a reference on a server record.
344  */
345 void afs_put_server(struct afs_net *net, struct afs_server *server,
346 		    enum afs_server_trace reason)
347 {
348 	unsigned int usage;
349 
350 	if (!server)
351 		return;
352 
353 	server->put_time = ktime_get_real_seconds();
354 
355 	usage = atomic_dec_return(&server->usage);
356 
357 	trace_afs_server(server, usage, reason);
358 
359 	if (likely(usage > 0))
360 		return;
361 
362 	afs_set_server_timer(net, afs_server_gc_delay);
363 }
364 
365 static void afs_server_rcu(struct rcu_head *rcu)
366 {
367 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
368 
369 	trace_afs_server(server, atomic_read(&server->usage),
370 			 afs_server_trace_free);
371 	afs_put_addrlist(rcu_access_pointer(server->addresses));
372 	kfree(server);
373 }
374 
375 /*
376  * destroy a dead server
377  */
378 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
379 {
380 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
381 	struct afs_addr_cursor ac = {
382 		.alist	= alist,
383 		.index	= alist->preferred,
384 		.error	= 0,
385 	};
386 
387 	trace_afs_server(server, atomic_read(&server->usage),
388 			 afs_server_trace_give_up_cb);
389 
390 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
391 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
392 
393 	wait_var_event(&server->probe_outstanding,
394 		       atomic_read(&server->probe_outstanding) == 0);
395 
396 	trace_afs_server(server, atomic_read(&server->usage),
397 			 afs_server_trace_destroy);
398 	call_rcu(&server->rcu, afs_server_rcu);
399 	afs_dec_servers_outstanding(net);
400 }
401 
402 /*
403  * Garbage collect any expired servers.
404  */
405 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
406 {
407 	struct afs_server *server;
408 	bool deleted;
409 	int usage;
410 
411 	while ((server = gc_list)) {
412 		gc_list = server->gc_next;
413 
414 		write_seqlock(&net->fs_lock);
415 		usage = 1;
416 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
417 		trace_afs_server(server, usage, afs_server_trace_gc);
418 		if (deleted) {
419 			rb_erase(&server->uuid_rb, &net->fs_servers);
420 			hlist_del_rcu(&server->proc_link);
421 		}
422 		write_sequnlock(&net->fs_lock);
423 
424 		if (deleted) {
425 			write_seqlock(&net->fs_addr_lock);
426 			if (!hlist_unhashed(&server->addr4_link))
427 				hlist_del_rcu(&server->addr4_link);
428 			if (!hlist_unhashed(&server->addr6_link))
429 				hlist_del_rcu(&server->addr6_link);
430 			write_sequnlock(&net->fs_addr_lock);
431 			afs_destroy_server(net, server);
432 		}
433 	}
434 }
435 
436 /*
437  * Manage the records of servers known to be within a network namespace.  This
438  * includes garbage collecting unused servers.
439  *
440  * Note also that we were given an increment on net->servers_outstanding by
441  * whoever queued us that we need to deal with before returning.
442  */
443 void afs_manage_servers(struct work_struct *work)
444 {
445 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
446 	struct afs_server *gc_list = NULL;
447 	struct rb_node *cursor;
448 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
449 	bool purging = !net->live;
450 
451 	_enter("");
452 
453 	/* Trawl the server list looking for servers that have expired from
454 	 * lack of use.
455 	 */
456 	read_seqlock_excl(&net->fs_lock);
457 
458 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
459 		struct afs_server *server =
460 			rb_entry(cursor, struct afs_server, uuid_rb);
461 		int usage = atomic_read(&server->usage);
462 
463 		_debug("manage %pU %u", &server->uuid, usage);
464 
465 		ASSERTCMP(usage, >=, 1);
466 		ASSERTIFCMP(purging, usage, ==, 1);
467 
468 		if (usage == 1) {
469 			time64_t expire_at = server->put_time;
470 
471 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
472 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
473 				expire_at += afs_server_gc_delay;
474 			if (purging || expire_at <= now) {
475 				server->gc_next = gc_list;
476 				gc_list = server;
477 			} else if (expire_at < next_manage) {
478 				next_manage = expire_at;
479 			}
480 		}
481 	}
482 
483 	read_sequnlock_excl(&net->fs_lock);
484 
485 	/* Update the timer on the way out.  We have to pass an increment on
486 	 * servers_outstanding in the namespace that we are in to the timer or
487 	 * the work scheduler.
488 	 */
489 	if (!purging && next_manage < TIME64_MAX) {
490 		now = ktime_get_real_seconds();
491 
492 		if (next_manage - now <= 0) {
493 			if (queue_work(afs_wq, &net->fs_manager))
494 				afs_inc_servers_outstanding(net);
495 		} else {
496 			afs_set_server_timer(net, next_manage - now);
497 		}
498 	}
499 
500 	afs_gc_servers(net, gc_list);
501 
502 	afs_dec_servers_outstanding(net);
503 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
504 }
505 
506 static void afs_queue_server_manager(struct afs_net *net)
507 {
508 	afs_inc_servers_outstanding(net);
509 	if (!queue_work(afs_wq, &net->fs_manager))
510 		afs_dec_servers_outstanding(net);
511 }
512 
513 /*
514  * Purge list of servers.
515  */
516 void afs_purge_servers(struct afs_net *net)
517 {
518 	_enter("");
519 
520 	if (del_timer_sync(&net->fs_timer))
521 		atomic_dec(&net->servers_outstanding);
522 
523 	afs_queue_server_manager(net);
524 
525 	_debug("wait");
526 	wait_var_event(&net->servers_outstanding,
527 		       !atomic_read(&net->servers_outstanding));
528 	_leave("");
529 }
530 
531 /*
532  * Get an update for a server's address list.
533  */
534 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
535 {
536 	struct afs_addr_list *alist, *discard;
537 
538 	_enter("");
539 
540 	trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
541 
542 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
543 				    &server->uuid);
544 	if (IS_ERR(alist)) {
545 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
546 		     PTR_ERR(alist) == -EINTR) &&
547 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
548 		    server->addresses) {
549 			_leave(" = t [intr]");
550 			return true;
551 		}
552 		fc->error = PTR_ERR(alist);
553 		_leave(" = f [%d]", fc->error);
554 		return false;
555 	}
556 
557 	discard = alist;
558 	if (server->addr_version != alist->version) {
559 		write_lock(&server->fs_lock);
560 		discard = rcu_dereference_protected(server->addresses,
561 						    lockdep_is_held(&server->fs_lock));
562 		rcu_assign_pointer(server->addresses, alist);
563 		server->addr_version = alist->version;
564 		write_unlock(&server->fs_lock);
565 	}
566 
567 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
568 	afs_put_addrlist(discard);
569 	_leave(" = t");
570 	return true;
571 }
572 
573 /*
574  * See if a server's address list needs updating.
575  */
576 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
577 {
578 	time64_t now = ktime_get_real_seconds();
579 	long diff;
580 	bool success;
581 	int ret, retries = 0;
582 
583 	_enter("");
584 
585 	ASSERT(server);
586 
587 retry:
588 	diff = READ_ONCE(server->update_at) - now;
589 	if (diff > 0) {
590 		_leave(" = t [not now %ld]", diff);
591 		return true;
592 	}
593 
594 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
595 		success = afs_update_server_record(fc, server);
596 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
597 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
598 		_leave(" = %d", success);
599 		return success;
600 	}
601 
602 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
603 			  TASK_INTERRUPTIBLE);
604 	if (ret == -ERESTARTSYS) {
605 		if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
606 			_leave(" = t [intr]");
607 			return true;
608 		}
609 		fc->error = ret;
610 		_leave(" = f [intr]");
611 		return false;
612 	}
613 
614 	retries++;
615 	if (retries == 4) {
616 		_leave(" = f [stale]");
617 		ret = -ESTALE;
618 		return false;
619 	}
620 	goto retry;
621 }
622