xref: /openbmc/linux/fs/afs/server.c (revision 6f69e2a3)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17 
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20 	atomic_inc(&net->servers_outstanding);
21 }
22 
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25 	if (atomic_dec_and_test(&net->servers_outstanding))
26 		wake_up_var(&net->servers_outstanding);
27 }
28 
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33 				   const struct sockaddr_rxrpc *srx)
34 {
35 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
36 	const struct afs_addr_list *alist;
37 	struct afs_server *server = NULL;
38 	unsigned int i;
39 	bool ipv6 = true;
40 	int seq = 0, diff;
41 
42 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
43 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
44 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
45 		ipv6 = false;
46 
47 	rcu_read_lock();
48 
49 	do {
50 		if (server)
51 			afs_put_server(net, server, afs_server_trace_put_find_rsq);
52 		server = NULL;
53 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
54 
55 		if (ipv6) {
56 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
57 				alist = rcu_dereference(server->addresses);
58 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
59 					b = &alist->addrs[i].transport.sin6;
60 					diff = ((u16 __force)a->sin6_port -
61 						(u16 __force)b->sin6_port);
62 					if (diff == 0)
63 						diff = memcmp(&a->sin6_addr,
64 							      &b->sin6_addr,
65 							      sizeof(struct in6_addr));
66 					if (diff == 0)
67 						goto found;
68 				}
69 			}
70 		} else {
71 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
72 				alist = rcu_dereference(server->addresses);
73 				for (i = 0; i < alist->nr_ipv4; i++) {
74 					b = &alist->addrs[i].transport.sin6;
75 					diff = ((u16 __force)a->sin6_port -
76 						(u16 __force)b->sin6_port);
77 					if (diff == 0)
78 						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
79 							(u32 __force)b->sin6_addr.s6_addr32[3]);
80 					if (diff == 0)
81 						goto found;
82 				}
83 			}
84 		}
85 
86 		server = NULL;
87 	found:
88 		if (server && !atomic_inc_not_zero(&server->usage))
89 			server = NULL;
90 
91 	} while (need_seqretry(&net->fs_addr_lock, seq));
92 
93 	done_seqretry(&net->fs_addr_lock, seq);
94 
95 	rcu_read_unlock();
96 	return server;
97 }
98 
99 /*
100  * Look up a server by its UUID
101  */
102 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
103 {
104 	struct afs_server *server = NULL;
105 	struct rb_node *p;
106 	int diff, seq = 0;
107 
108 	_enter("%pU", uuid);
109 
110 	do {
111 		/* Unfortunately, rbtree walking doesn't give reliable results
112 		 * under just the RCU read lock, so we have to check for
113 		 * changes.
114 		 */
115 		if (server)
116 			afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
117 		server = NULL;
118 
119 		read_seqbegin_or_lock(&net->fs_lock, &seq);
120 
121 		p = net->fs_servers.rb_node;
122 		while (p) {
123 			server = rb_entry(p, struct afs_server, uuid_rb);
124 
125 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
126 			if (diff < 0) {
127 				p = p->rb_left;
128 			} else if (diff > 0) {
129 				p = p->rb_right;
130 			} else {
131 				afs_get_server(server, afs_server_trace_get_by_uuid);
132 				break;
133 			}
134 
135 			server = NULL;
136 		}
137 	} while (need_seqretry(&net->fs_lock, seq));
138 
139 	done_seqretry(&net->fs_lock, seq);
140 
141 	_leave(" = %p", server);
142 	return server;
143 }
144 
145 /*
146  * Install a server record in the namespace tree
147  */
148 static struct afs_server *afs_install_server(struct afs_net *net,
149 					     struct afs_server *candidate)
150 {
151 	const struct afs_addr_list *alist;
152 	struct afs_server *server;
153 	struct rb_node **pp, *p;
154 	int diff;
155 
156 	_enter("%p", candidate);
157 
158 	write_seqlock(&net->fs_lock);
159 
160 	/* Firstly install the server in the UUID lookup tree */
161 	pp = &net->fs_servers.rb_node;
162 	p = NULL;
163 	while (*pp) {
164 		p = *pp;
165 		_debug("- consider %p", p);
166 		server = rb_entry(p, struct afs_server, uuid_rb);
167 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
168 		if (diff < 0)
169 			pp = &(*pp)->rb_left;
170 		else if (diff > 0)
171 			pp = &(*pp)->rb_right;
172 		else
173 			goto exists;
174 	}
175 
176 	server = candidate;
177 	rb_link_node(&server->uuid_rb, p, pp);
178 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
179 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
180 
181 	write_seqlock(&net->fs_addr_lock);
182 	alist = rcu_dereference_protected(server->addresses,
183 					  lockdep_is_held(&net->fs_addr_lock.lock));
184 
185 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
186 	 * it in the IPv4 and/or IPv6 reverse-map lists.
187 	 *
188 	 * TODO: For speed we want to use something other than a flat list
189 	 * here; even sorting the list in terms of lowest address would help a
190 	 * bit, but anything we might want to do gets messy and memory
191 	 * intensive.
192 	 */
193 	if (alist->nr_ipv4 > 0)
194 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
195 	if (alist->nr_addrs > alist->nr_ipv4)
196 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
197 
198 	write_sequnlock(&net->fs_addr_lock);
199 
200 exists:
201 	afs_get_server(server, afs_server_trace_get_install);
202 	write_sequnlock(&net->fs_lock);
203 	return server;
204 }
205 
206 /*
207  * allocate a new server record
208  */
209 static struct afs_server *afs_alloc_server(struct afs_net *net,
210 					   const uuid_t *uuid,
211 					   struct afs_addr_list *alist)
212 {
213 	struct afs_server *server;
214 
215 	_enter("");
216 
217 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
218 	if (!server)
219 		goto enomem;
220 
221 	atomic_set(&server->usage, 1);
222 	server->debug_id = atomic_inc_return(&afs_server_debug_id);
223 	RCU_INIT_POINTER(server->addresses, alist);
224 	server->addr_version = alist->version;
225 	server->uuid = *uuid;
226 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
227 	rwlock_init(&server->fs_lock);
228 	INIT_HLIST_HEAD(&server->cb_volumes);
229 	rwlock_init(&server->cb_break_lock);
230 	init_waitqueue_head(&server->probe_wq);
231 	spin_lock_init(&server->probe_lock);
232 
233 	afs_inc_servers_outstanding(net);
234 	trace_afs_server(server, 1, afs_server_trace_alloc);
235 	_leave(" = %p", server);
236 	return server;
237 
238 enomem:
239 	_leave(" = NULL [nomem]");
240 	return NULL;
241 }
242 
243 /*
244  * Look up an address record for a server
245  */
246 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
247 						 struct key *key, const uuid_t *uuid)
248 {
249 	struct afs_vl_cursor vc;
250 	struct afs_addr_list *alist = NULL;
251 	int ret;
252 
253 	ret = -ERESTARTSYS;
254 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
255 		while (afs_select_vlserver(&vc)) {
256 			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
257 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
258 			else
259 				alist = afs_vl_get_addrs_u(&vc, uuid);
260 		}
261 
262 		ret = afs_end_vlserver_operation(&vc);
263 	}
264 
265 	return ret < 0 ? ERR_PTR(ret) : alist;
266 }
267 
268 /*
269  * Get or create a fileserver record.
270  */
271 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
272 				     const uuid_t *uuid)
273 {
274 	struct afs_addr_list *alist;
275 	struct afs_server *server, *candidate;
276 
277 	_enter("%p,%pU", cell->net, uuid);
278 
279 	server = afs_find_server_by_uuid(cell->net, uuid);
280 	if (server)
281 		return server;
282 
283 	alist = afs_vl_lookup_addrs(cell, key, uuid);
284 	if (IS_ERR(alist))
285 		return ERR_CAST(alist);
286 
287 	candidate = afs_alloc_server(cell->net, uuid, alist);
288 	if (!candidate) {
289 		afs_put_addrlist(alist);
290 		return ERR_PTR(-ENOMEM);
291 	}
292 
293 	server = afs_install_server(cell->net, candidate);
294 	if (server != candidate) {
295 		afs_put_addrlist(alist);
296 		kfree(candidate);
297 	}
298 
299 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
300 	return server;
301 }
302 
303 /*
304  * Set the server timer to fire after a given delay, assuming it's not already
305  * set for an earlier time.
306  */
307 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
308 {
309 	if (net->live) {
310 		afs_inc_servers_outstanding(net);
311 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
312 			afs_dec_servers_outstanding(net);
313 	}
314 }
315 
316 /*
317  * Server management timer.  We have an increment on fs_outstanding that we
318  * need to pass along to the work item.
319  */
320 void afs_servers_timer(struct timer_list *timer)
321 {
322 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
323 
324 	_enter("");
325 	if (!queue_work(afs_wq, &net->fs_manager))
326 		afs_dec_servers_outstanding(net);
327 }
328 
329 /*
330  * Get a reference on a server object.
331  */
332 struct afs_server *afs_get_server(struct afs_server *server,
333 				  enum afs_server_trace reason)
334 {
335 	unsigned int u = atomic_inc_return(&server->usage);
336 
337 	trace_afs_server(server, u, reason);
338 	return server;
339 }
340 
341 /*
342  * Release a reference on a server record.
343  */
344 void afs_put_server(struct afs_net *net, struct afs_server *server,
345 		    enum afs_server_trace reason)
346 {
347 	unsigned int usage;
348 
349 	if (!server)
350 		return;
351 
352 	server->put_time = ktime_get_real_seconds();
353 
354 	usage = atomic_dec_return(&server->usage);
355 
356 	trace_afs_server(server, usage, reason);
357 
358 	if (likely(usage > 0))
359 		return;
360 
361 	afs_set_server_timer(net, afs_server_gc_delay);
362 }
363 
364 static void afs_server_rcu(struct rcu_head *rcu)
365 {
366 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
367 
368 	trace_afs_server(server, atomic_read(&server->usage),
369 			 afs_server_trace_free);
370 	afs_put_addrlist(rcu_access_pointer(server->addresses));
371 	kfree(server);
372 }
373 
374 /*
375  * destroy a dead server
376  */
377 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
378 {
379 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
380 	struct afs_addr_cursor ac = {
381 		.alist	= alist,
382 		.index	= alist->preferred,
383 		.error	= 0,
384 	};
385 
386 	trace_afs_server(server, atomic_read(&server->usage),
387 			 afs_server_trace_give_up_cb);
388 
389 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
390 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
391 
392 	wait_var_event(&server->probe_outstanding,
393 		       atomic_read(&server->probe_outstanding) == 0);
394 
395 	trace_afs_server(server, atomic_read(&server->usage),
396 			 afs_server_trace_destroy);
397 	call_rcu(&server->rcu, afs_server_rcu);
398 	afs_dec_servers_outstanding(net);
399 }
400 
401 /*
402  * Garbage collect any expired servers.
403  */
404 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
405 {
406 	struct afs_server *server;
407 	bool deleted;
408 	int usage;
409 
410 	while ((server = gc_list)) {
411 		gc_list = server->gc_next;
412 
413 		write_seqlock(&net->fs_lock);
414 		usage = 1;
415 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
416 		trace_afs_server(server, usage, afs_server_trace_gc);
417 		if (deleted) {
418 			rb_erase(&server->uuid_rb, &net->fs_servers);
419 			hlist_del_rcu(&server->proc_link);
420 		}
421 		write_sequnlock(&net->fs_lock);
422 
423 		if (deleted) {
424 			write_seqlock(&net->fs_addr_lock);
425 			if (!hlist_unhashed(&server->addr4_link))
426 				hlist_del_rcu(&server->addr4_link);
427 			if (!hlist_unhashed(&server->addr6_link))
428 				hlist_del_rcu(&server->addr6_link);
429 			write_sequnlock(&net->fs_addr_lock);
430 			afs_destroy_server(net, server);
431 		}
432 	}
433 }
434 
435 /*
436  * Manage the records of servers known to be within a network namespace.  This
437  * includes garbage collecting unused servers.
438  *
439  * Note also that we were given an increment on net->servers_outstanding by
440  * whoever queued us that we need to deal with before returning.
441  */
442 void afs_manage_servers(struct work_struct *work)
443 {
444 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
445 	struct afs_server *gc_list = NULL;
446 	struct rb_node *cursor;
447 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
448 	bool purging = !net->live;
449 
450 	_enter("");
451 
452 	/* Trawl the server list looking for servers that have expired from
453 	 * lack of use.
454 	 */
455 	read_seqlock_excl(&net->fs_lock);
456 
457 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
458 		struct afs_server *server =
459 			rb_entry(cursor, struct afs_server, uuid_rb);
460 		int usage = atomic_read(&server->usage);
461 
462 		_debug("manage %pU %u", &server->uuid, usage);
463 
464 		ASSERTCMP(usage, >=, 1);
465 		ASSERTIFCMP(purging, usage, ==, 1);
466 
467 		if (usage == 1) {
468 			time64_t expire_at = server->put_time;
469 
470 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
471 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
472 				expire_at += afs_server_gc_delay;
473 			if (purging || expire_at <= now) {
474 				server->gc_next = gc_list;
475 				gc_list = server;
476 			} else if (expire_at < next_manage) {
477 				next_manage = expire_at;
478 			}
479 		}
480 	}
481 
482 	read_sequnlock_excl(&net->fs_lock);
483 
484 	/* Update the timer on the way out.  We have to pass an increment on
485 	 * servers_outstanding in the namespace that we are in to the timer or
486 	 * the work scheduler.
487 	 */
488 	if (!purging && next_manage < TIME64_MAX) {
489 		now = ktime_get_real_seconds();
490 
491 		if (next_manage - now <= 0) {
492 			if (queue_work(afs_wq, &net->fs_manager))
493 				afs_inc_servers_outstanding(net);
494 		} else {
495 			afs_set_server_timer(net, next_manage - now);
496 		}
497 	}
498 
499 	afs_gc_servers(net, gc_list);
500 
501 	afs_dec_servers_outstanding(net);
502 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
503 }
504 
505 static void afs_queue_server_manager(struct afs_net *net)
506 {
507 	afs_inc_servers_outstanding(net);
508 	if (!queue_work(afs_wq, &net->fs_manager))
509 		afs_dec_servers_outstanding(net);
510 }
511 
512 /*
513  * Purge list of servers.
514  */
515 void afs_purge_servers(struct afs_net *net)
516 {
517 	_enter("");
518 
519 	if (del_timer_sync(&net->fs_timer))
520 		atomic_dec(&net->servers_outstanding);
521 
522 	afs_queue_server_manager(net);
523 
524 	_debug("wait");
525 	wait_var_event(&net->servers_outstanding,
526 		       !atomic_read(&net->servers_outstanding));
527 	_leave("");
528 }
529 
530 /*
531  * Get an update for a server's address list.
532  */
533 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
534 {
535 	struct afs_addr_list *alist, *discard;
536 
537 	_enter("");
538 
539 	trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
540 
541 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
542 				    &server->uuid);
543 	if (IS_ERR(alist)) {
544 		if ((PTR_ERR(alist) == -ERESTARTSYS ||
545 		     PTR_ERR(alist) == -EINTR) &&
546 		    !(fc->flags & AFS_FS_CURSOR_INTR) &&
547 		    server->addresses) {
548 			_leave(" = t [intr]");
549 			return true;
550 		}
551 		fc->error = PTR_ERR(alist);
552 		_leave(" = f [%d]", fc->error);
553 		return false;
554 	}
555 
556 	discard = alist;
557 	if (server->addr_version != alist->version) {
558 		write_lock(&server->fs_lock);
559 		discard = rcu_dereference_protected(server->addresses,
560 						    lockdep_is_held(&server->fs_lock));
561 		rcu_assign_pointer(server->addresses, alist);
562 		server->addr_version = alist->version;
563 		write_unlock(&server->fs_lock);
564 	}
565 
566 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
567 	afs_put_addrlist(discard);
568 	_leave(" = t");
569 	return true;
570 }
571 
572 /*
573  * See if a server's address list needs updating.
574  */
575 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
576 {
577 	time64_t now = ktime_get_real_seconds();
578 	long diff;
579 	bool success;
580 	int ret, retries = 0;
581 
582 	_enter("");
583 
584 	ASSERT(server);
585 
586 retry:
587 	diff = READ_ONCE(server->update_at) - now;
588 	if (diff > 0) {
589 		_leave(" = t [not now %ld]", diff);
590 		return true;
591 	}
592 
593 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
594 		success = afs_update_server_record(fc, server);
595 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
596 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
597 		_leave(" = %d", success);
598 		return success;
599 	}
600 
601 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
602 			  TASK_INTERRUPTIBLE);
603 	if (ret == -ERESTARTSYS) {
604 		if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
605 			_leave(" = t [intr]");
606 			return true;
607 		}
608 		fc->error = ret;
609 		_leave(" = f [intr]");
610 		return false;
611 	}
612 
613 	retries++;
614 	if (retries == 4) {
615 		_leave(" = f [stale]");
616 		ret = -ESTALE;
617 		return false;
618 	}
619 	goto retry;
620 }
621