xref: /openbmc/linux/fs/afs/server.c (revision d623f60d)
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16 
17 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
19 
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22 	atomic_inc(&net->servers_outstanding);
23 }
24 
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27 	if (atomic_dec_and_test(&net->servers_outstanding))
28 		wake_up_var(&net->servers_outstanding);
29 }
30 
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35 				   const struct sockaddr_rxrpc *srx)
36 {
37 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38 	const struct afs_addr_list *alist;
39 	struct afs_server *server = NULL;
40 	unsigned int i;
41 	bool ipv6 = true;
42 	int seq = 0, diff;
43 
44 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47 		ipv6 = false;
48 
49 	rcu_read_lock();
50 
51 	do {
52 		if (server)
53 			afs_put_server(net, server);
54 		server = NULL;
55 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56 
57 		if (ipv6) {
58 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59 				alist = rcu_dereference(server->addresses);
60 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61 					b = &alist->addrs[i].transport.sin6;
62 					diff = ((u16 __force)a->sin6_port -
63 						(u16 __force)b->sin6_port);
64 					if (diff == 0)
65 						diff = memcmp(&a->sin6_addr,
66 							      &b->sin6_addr,
67 							      sizeof(struct in6_addr));
68 					if (diff == 0)
69 						goto found;
70 				}
71 			}
72 		} else {
73 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
74 				alist = rcu_dereference(server->addresses);
75 				for (i = 0; i < alist->nr_ipv4; i++) {
76 					b = &alist->addrs[i].transport.sin6;
77 					diff = ((u16 __force)a->sin6_port -
78 						(u16 __force)b->sin6_port);
79 					if (diff == 0)
80 						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
81 							(u32 __force)b->sin6_addr.s6_addr32[3]);
82 					if (diff == 0)
83 						goto found;
84 				}
85 			}
86 		}
87 
88 		server = NULL;
89 	found:
90 		if (server && !atomic_inc_not_zero(&server->usage))
91 			server = NULL;
92 
93 	} while (need_seqretry(&net->fs_addr_lock, seq));
94 
95 	done_seqretry(&net->fs_addr_lock, seq);
96 
97 	rcu_read_unlock();
98 	return server;
99 }
100 
101 /*
102  * Look up a server by its UUID
103  */
104 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
105 {
106 	struct afs_server *server = NULL;
107 	struct rb_node *p;
108 	int diff, seq = 0;
109 
110 	_enter("%pU", uuid);
111 
112 	do {
113 		/* Unfortunately, rbtree walking doesn't give reliable results
114 		 * under just the RCU read lock, so we have to check for
115 		 * changes.
116 		 */
117 		if (server)
118 			afs_put_server(net, server);
119 		server = NULL;
120 
121 		read_seqbegin_or_lock(&net->fs_lock, &seq);
122 
123 		p = net->fs_servers.rb_node;
124 		while (p) {
125 			server = rb_entry(p, struct afs_server, uuid_rb);
126 
127 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
128 			if (diff < 0) {
129 				p = p->rb_left;
130 			} else if (diff > 0) {
131 				p = p->rb_right;
132 			} else {
133 				afs_get_server(server);
134 				break;
135 			}
136 
137 			server = NULL;
138 		}
139 	} while (need_seqretry(&net->fs_lock, seq));
140 
141 	done_seqretry(&net->fs_lock, seq);
142 
143 	_leave(" = %p", server);
144 	return server;
145 }
146 
147 /*
148  * Install a server record in the namespace tree
149  */
150 static struct afs_server *afs_install_server(struct afs_net *net,
151 					     struct afs_server *candidate)
152 {
153 	const struct afs_addr_list *alist;
154 	struct afs_server *server;
155 	struct rb_node **pp, *p;
156 	int ret = -EEXIST, diff;
157 
158 	_enter("%p", candidate);
159 
160 	write_seqlock(&net->fs_lock);
161 
162 	/* Firstly install the server in the UUID lookup tree */
163 	pp = &net->fs_servers.rb_node;
164 	p = NULL;
165 	while (*pp) {
166 		p = *pp;
167 		_debug("- consider %p", p);
168 		server = rb_entry(p, struct afs_server, uuid_rb);
169 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
170 		if (diff < 0)
171 			pp = &(*pp)->rb_left;
172 		else if (diff > 0)
173 			pp = &(*pp)->rb_right;
174 		else
175 			goto exists;
176 	}
177 
178 	server = candidate;
179 	rb_link_node(&server->uuid_rb, p, pp);
180 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
181 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
182 
183 	write_seqlock(&net->fs_addr_lock);
184 	alist = rcu_dereference_protected(server->addresses,
185 					  lockdep_is_held(&net->fs_addr_lock.lock));
186 
187 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
188 	 * it in the IPv4 and/or IPv6 reverse-map lists.
189 	 *
190 	 * TODO: For speed we want to use something other than a flat list
191 	 * here; even sorting the list in terms of lowest address would help a
192 	 * bit, but anything we might want to do gets messy and memory
193 	 * intensive.
194 	 */
195 	if (alist->nr_ipv4 > 0)
196 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
197 	if (alist->nr_addrs > alist->nr_ipv4)
198 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
199 
200 	write_sequnlock(&net->fs_addr_lock);
201 	ret = 0;
202 
203 exists:
204 	afs_get_server(server);
205 	write_sequnlock(&net->fs_lock);
206 	return server;
207 }
208 
209 /*
210  * allocate a new server record
211  */
212 static struct afs_server *afs_alloc_server(struct afs_net *net,
213 					   const uuid_t *uuid,
214 					   struct afs_addr_list *alist)
215 {
216 	struct afs_server *server;
217 
218 	_enter("");
219 
220 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
221 	if (!server)
222 		goto enomem;
223 
224 	atomic_set(&server->usage, 1);
225 	RCU_INIT_POINTER(server->addresses, alist);
226 	server->addr_version = alist->version;
227 	server->uuid = *uuid;
228 	server->flags = (1UL << AFS_SERVER_FL_NEW);
229 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
230 	rwlock_init(&server->fs_lock);
231 	INIT_HLIST_HEAD(&server->cb_volumes);
232 	rwlock_init(&server->cb_break_lock);
233 
234 	afs_inc_servers_outstanding(net);
235 	_leave(" = %p", server);
236 	return server;
237 
238 enomem:
239 	_leave(" = NULL [nomem]");
240 	return NULL;
241 }
242 
243 /*
244  * Look up an address record for a server
245  */
246 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
247 						 struct key *key, const uuid_t *uuid)
248 {
249 	struct afs_addr_cursor ac;
250 	struct afs_addr_list *alist;
251 	int ret;
252 
253 	ret = afs_set_vl_cursor(&ac, cell);
254 	if (ret < 0)
255 		return ERR_PTR(ret);
256 
257 	while (afs_iterate_addresses(&ac)) {
258 		if (test_bit(ac.index, &ac.alist->yfs))
259 			alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
260 		else
261 			alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
262 		switch (ac.error) {
263 		case 0:
264 			afs_end_cursor(&ac);
265 			return alist;
266 		case -ECONNABORTED:
267 			ac.error = afs_abort_to_error(ac.abort_code);
268 			goto error;
269 		case -ENOMEM:
270 		case -ENONET:
271 			goto error;
272 		case -ENETUNREACH:
273 		case -EHOSTUNREACH:
274 		case -ECONNREFUSED:
275 			break;
276 		default:
277 			ac.error = -EIO;
278 			goto error;
279 		}
280 	}
281 
282 error:
283 	return ERR_PTR(afs_end_cursor(&ac));
284 }
285 
286 /*
287  * Get or create a fileserver record.
288  */
289 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
290 				     const uuid_t *uuid)
291 {
292 	struct afs_addr_list *alist;
293 	struct afs_server *server, *candidate;
294 
295 	_enter("%p,%pU", cell->net, uuid);
296 
297 	server = afs_find_server_by_uuid(cell->net, uuid);
298 	if (server)
299 		return server;
300 
301 	alist = afs_vl_lookup_addrs(cell, key, uuid);
302 	if (IS_ERR(alist))
303 		return ERR_CAST(alist);
304 
305 	candidate = afs_alloc_server(cell->net, uuid, alist);
306 	if (!candidate) {
307 		afs_put_addrlist(alist);
308 		return ERR_PTR(-ENOMEM);
309 	}
310 
311 	server = afs_install_server(cell->net, candidate);
312 	if (server != candidate) {
313 		afs_put_addrlist(alist);
314 		kfree(candidate);
315 	}
316 
317 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
318 	return server;
319 }
320 
321 /*
322  * Set the server timer to fire after a given delay, assuming it's not already
323  * set for an earlier time.
324  */
325 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
326 {
327 	if (net->live) {
328 		afs_inc_servers_outstanding(net);
329 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
330 			afs_dec_servers_outstanding(net);
331 	}
332 }
333 
334 /*
335  * Server management timer.  We have an increment on fs_outstanding that we
336  * need to pass along to the work item.
337  */
338 void afs_servers_timer(struct timer_list *timer)
339 {
340 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
341 
342 	_enter("");
343 	if (!queue_work(afs_wq, &net->fs_manager))
344 		afs_dec_servers_outstanding(net);
345 }
346 
347 /*
348  * Release a reference on a server record.
349  */
350 void afs_put_server(struct afs_net *net, struct afs_server *server)
351 {
352 	unsigned int usage;
353 
354 	if (!server)
355 		return;
356 
357 	server->put_time = ktime_get_real_seconds();
358 
359 	usage = atomic_dec_return(&server->usage);
360 
361 	_enter("{%u}", usage);
362 
363 	if (likely(usage > 0))
364 		return;
365 
366 	afs_set_server_timer(net, afs_server_gc_delay);
367 }
368 
369 static void afs_server_rcu(struct rcu_head *rcu)
370 {
371 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
372 
373 	afs_put_addrlist(rcu_access_pointer(server->addresses));
374 	kfree(server);
375 }
376 
377 /*
378  * destroy a dead server
379  */
380 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
381 {
382 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
383 	struct afs_addr_cursor ac = {
384 		.alist	= alist,
385 		.start	= alist->index,
386 		.index	= 0,
387 		.addr	= &alist->addrs[alist->index],
388 		.error	= 0,
389 	};
390 	_enter("%p", server);
391 
392 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
393 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
394 
395 	call_rcu(&server->rcu, afs_server_rcu);
396 	afs_dec_servers_outstanding(net);
397 }
398 
399 /*
400  * Garbage collect any expired servers.
401  */
402 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
403 {
404 	struct afs_server *server;
405 	bool deleted;
406 	int usage;
407 
408 	while ((server = gc_list)) {
409 		gc_list = server->gc_next;
410 
411 		write_seqlock(&net->fs_lock);
412 		usage = 1;
413 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
414 		if (deleted) {
415 			rb_erase(&server->uuid_rb, &net->fs_servers);
416 			hlist_del_rcu(&server->proc_link);
417 		}
418 		write_sequnlock(&net->fs_lock);
419 
420 		if (deleted) {
421 			write_seqlock(&net->fs_addr_lock);
422 			if (!hlist_unhashed(&server->addr4_link))
423 				hlist_del_rcu(&server->addr4_link);
424 			if (!hlist_unhashed(&server->addr6_link))
425 				hlist_del_rcu(&server->addr6_link);
426 			write_sequnlock(&net->fs_addr_lock);
427 			afs_destroy_server(net, server);
428 		}
429 	}
430 }
431 
432 /*
433  * Manage the records of servers known to be within a network namespace.  This
434  * includes garbage collecting unused servers.
435  *
436  * Note also that we were given an increment on net->servers_outstanding by
437  * whoever queued us that we need to deal with before returning.
438  */
439 void afs_manage_servers(struct work_struct *work)
440 {
441 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
442 	struct afs_server *gc_list = NULL;
443 	struct rb_node *cursor;
444 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
445 	bool purging = !net->live;
446 
447 	_enter("");
448 
449 	/* Trawl the server list looking for servers that have expired from
450 	 * lack of use.
451 	 */
452 	read_seqlock_excl(&net->fs_lock);
453 
454 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
455 		struct afs_server *server =
456 			rb_entry(cursor, struct afs_server, uuid_rb);
457 		int usage = atomic_read(&server->usage);
458 
459 		_debug("manage %pU %u", &server->uuid, usage);
460 
461 		ASSERTCMP(usage, >=, 1);
462 		ASSERTIFCMP(purging, usage, ==, 1);
463 
464 		if (usage == 1) {
465 			time64_t expire_at = server->put_time;
466 
467 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
468 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
469 				expire_at += afs_server_gc_delay;
470 			if (purging || expire_at <= now) {
471 				server->gc_next = gc_list;
472 				gc_list = server;
473 			} else if (expire_at < next_manage) {
474 				next_manage = expire_at;
475 			}
476 		}
477 	}
478 
479 	read_sequnlock_excl(&net->fs_lock);
480 
481 	/* Update the timer on the way out.  We have to pass an increment on
482 	 * servers_outstanding in the namespace that we are in to the timer or
483 	 * the work scheduler.
484 	 */
485 	if (!purging && next_manage < TIME64_MAX) {
486 		now = ktime_get_real_seconds();
487 
488 		if (next_manage - now <= 0) {
489 			if (queue_work(afs_wq, &net->fs_manager))
490 				afs_inc_servers_outstanding(net);
491 		} else {
492 			afs_set_server_timer(net, next_manage - now);
493 		}
494 	}
495 
496 	afs_gc_servers(net, gc_list);
497 
498 	afs_dec_servers_outstanding(net);
499 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
500 }
501 
502 static void afs_queue_server_manager(struct afs_net *net)
503 {
504 	afs_inc_servers_outstanding(net);
505 	if (!queue_work(afs_wq, &net->fs_manager))
506 		afs_dec_servers_outstanding(net);
507 }
508 
509 /*
510  * Purge list of servers.
511  */
512 void afs_purge_servers(struct afs_net *net)
513 {
514 	_enter("");
515 
516 	if (del_timer_sync(&net->fs_timer))
517 		atomic_dec(&net->servers_outstanding);
518 
519 	afs_queue_server_manager(net);
520 
521 	_debug("wait");
522 	wait_var_event(&net->servers_outstanding,
523 		       !atomic_read(&net->servers_outstanding));
524 	_leave("");
525 }
526 
527 /*
528  * Probe a fileserver to find its capabilities.
529  *
530  * TODO: Try service upgrade.
531  */
532 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
533 {
534 	_enter("");
535 
536 	fc->ac.addr = NULL;
537 	fc->ac.start = READ_ONCE(fc->ac.alist->index);
538 	fc->ac.index = fc->ac.start;
539 	fc->ac.error = 0;
540 	fc->ac.begun = false;
541 
542 	while (afs_iterate_addresses(&fc->ac)) {
543 		afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
544 					&fc->ac, fc->key);
545 		switch (fc->ac.error) {
546 		case 0:
547 			afs_end_cursor(&fc->ac);
548 			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
549 			return true;
550 		case -ECONNABORTED:
551 			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
552 			goto error;
553 		case -ENOMEM:
554 		case -ENONET:
555 			goto error;
556 		case -ENETUNREACH:
557 		case -EHOSTUNREACH:
558 		case -ECONNREFUSED:
559 		case -ETIMEDOUT:
560 		case -ETIME:
561 			break;
562 		default:
563 			fc->ac.error = -EIO;
564 			goto error;
565 		}
566 	}
567 
568 error:
569 	afs_end_cursor(&fc->ac);
570 	return false;
571 }
572 
573 /*
574  * If we haven't already, try probing the fileserver to get its capabilities.
575  * We try not to instigate parallel probes, but it's possible that the parallel
576  * probes will fail due to authentication failure when ours would succeed.
577  *
578  * TODO: Try sending an anonymous probe if an authenticated probe fails.
579  */
580 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
581 {
582 	bool success;
583 	int ret, retries = 0;
584 
585 	_enter("");
586 
587 retry:
588 	if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
589 		_leave(" = t");
590 		return true;
591 	}
592 
593 	if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
594 		success = afs_do_probe_fileserver(fc);
595 		clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
596 		wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
597 		_leave(" = t");
598 		return success;
599 	}
600 
601 	_debug("wait");
602 	ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
603 			  TASK_INTERRUPTIBLE);
604 	if (ret == -ERESTARTSYS) {
605 		fc->ac.error = ret;
606 		_leave(" = f [%d]", ret);
607 		return false;
608 	}
609 
610 	retries++;
611 	if (retries == 4) {
612 		fc->ac.error = -ESTALE;
613 		_leave(" = f [stale]");
614 		return false;
615 	}
616 	_debug("retry");
617 	goto retry;
618 }
619 
620 /*
621  * Get an update for a server's address list.
622  */
623 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
624 {
625 	struct afs_addr_list *alist, *discard;
626 
627 	_enter("");
628 
629 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
630 				    &server->uuid);
631 	if (IS_ERR(alist)) {
632 		fc->ac.error = PTR_ERR(alist);
633 		_leave(" = f [%d]", fc->ac.error);
634 		return false;
635 	}
636 
637 	discard = alist;
638 	if (server->addr_version != alist->version) {
639 		write_lock(&server->fs_lock);
640 		discard = rcu_dereference_protected(server->addresses,
641 						    lockdep_is_held(&server->fs_lock));
642 		rcu_assign_pointer(server->addresses, alist);
643 		server->addr_version = alist->version;
644 		write_unlock(&server->fs_lock);
645 	}
646 
647 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
648 	afs_put_addrlist(discard);
649 	_leave(" = t");
650 	return true;
651 }
652 
653 /*
654  * See if a server's address list needs updating.
655  */
656 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
657 {
658 	time64_t now = ktime_get_real_seconds();
659 	long diff;
660 	bool success;
661 	int ret, retries = 0;
662 
663 	_enter("");
664 
665 	ASSERT(server);
666 
667 retry:
668 	diff = READ_ONCE(server->update_at) - now;
669 	if (diff > 0) {
670 		_leave(" = t [not now %ld]", diff);
671 		return true;
672 	}
673 
674 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
675 		success = afs_update_server_record(fc, server);
676 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
677 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
678 		_leave(" = %d", success);
679 		return success;
680 	}
681 
682 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
683 			  TASK_INTERRUPTIBLE);
684 	if (ret == -ERESTARTSYS) {
685 		fc->ac.error = ret;
686 		_leave(" = f [intr]");
687 		return false;
688 	}
689 
690 	retries++;
691 	if (retries == 4) {
692 		_leave(" = f [stale]");
693 		ret = -ESTALE;
694 		return false;
695 	}
696 	goto retry;
697 }
698