xref: /openbmc/linux/fs/afs/server.c (revision 4cff79e9)
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16 
17 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
19 
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22 	atomic_inc(&net->servers_outstanding);
23 }
24 
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27 	if (atomic_dec_and_test(&net->servers_outstanding))
28 		wake_up_var(&net->servers_outstanding);
29 }
30 
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35 				   const struct sockaddr_rxrpc *srx)
36 {
37 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38 	const struct afs_addr_list *alist;
39 	struct afs_server *server = NULL;
40 	unsigned int i;
41 	bool ipv6 = true;
42 	int seq = 0, diff;
43 
44 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47 		ipv6 = false;
48 
49 	rcu_read_lock();
50 
51 	do {
52 		if (server)
53 			afs_put_server(net, server);
54 		server = NULL;
55 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56 
57 		if (ipv6) {
58 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59 				alist = rcu_dereference(server->addresses);
60 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61 					b = &alist->addrs[i].transport.sin6;
62 					diff = ((u16 __force)a->sin6_port -
63 						(u16 __force)b->sin6_port);
64 					if (diff == 0)
65 						diff = memcmp(&a->sin6_addr,
66 							      &b->sin6_addr,
67 							      sizeof(struct in6_addr));
68 					if (diff == 0)
69 						goto found;
70 					if (diff < 0) {
71 						// TODO: Sort the list
72 						//if (i == alist->nr_ipv4)
73 						//	goto not_found;
74 						break;
75 					}
76 				}
77 			}
78 		} else {
79 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
80 				alist = rcu_dereference(server->addresses);
81 				for (i = 0; i < alist->nr_ipv4; i++) {
82 					b = &alist->addrs[i].transport.sin6;
83 					diff = ((u16 __force)a->sin6_port -
84 						(u16 __force)b->sin6_port);
85 					if (diff == 0)
86 						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
87 							(u32 __force)b->sin6_addr.s6_addr32[3]);
88 					if (diff == 0)
89 						goto found;
90 					if (diff < 0) {
91 						// TODO: Sort the list
92 						//if (i == 0)
93 						//	goto not_found;
94 						break;
95 					}
96 				}
97 			}
98 		}
99 
100 	//not_found:
101 		server = NULL;
102 	found:
103 		if (server && !atomic_inc_not_zero(&server->usage))
104 			server = NULL;
105 
106 	} while (need_seqretry(&net->fs_addr_lock, seq));
107 
108 	done_seqretry(&net->fs_addr_lock, seq);
109 
110 	rcu_read_unlock();
111 	return server;
112 }
113 
114 /*
115  * Look up a server by its UUID
116  */
117 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
118 {
119 	struct afs_server *server = NULL;
120 	struct rb_node *p;
121 	int diff, seq = 0;
122 
123 	_enter("%pU", uuid);
124 
125 	do {
126 		/* Unfortunately, rbtree walking doesn't give reliable results
127 		 * under just the RCU read lock, so we have to check for
128 		 * changes.
129 		 */
130 		if (server)
131 			afs_put_server(net, server);
132 		server = NULL;
133 
134 		read_seqbegin_or_lock(&net->fs_lock, &seq);
135 
136 		p = net->fs_servers.rb_node;
137 		while (p) {
138 			server = rb_entry(p, struct afs_server, uuid_rb);
139 
140 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
141 			if (diff < 0) {
142 				p = p->rb_left;
143 			} else if (diff > 0) {
144 				p = p->rb_right;
145 			} else {
146 				afs_get_server(server);
147 				break;
148 			}
149 
150 			server = NULL;
151 		}
152 	} while (need_seqretry(&net->fs_lock, seq));
153 
154 	done_seqretry(&net->fs_lock, seq);
155 
156 	_leave(" = %p", server);
157 	return server;
158 }
159 
160 /*
161  * Install a server record in the namespace tree
162  */
163 static struct afs_server *afs_install_server(struct afs_net *net,
164 					     struct afs_server *candidate)
165 {
166 	const struct afs_addr_list *alist;
167 	struct afs_server *server;
168 	struct rb_node **pp, *p;
169 	int ret = -EEXIST, diff;
170 
171 	_enter("%p", candidate);
172 
173 	write_seqlock(&net->fs_lock);
174 
175 	/* Firstly install the server in the UUID lookup tree */
176 	pp = &net->fs_servers.rb_node;
177 	p = NULL;
178 	while (*pp) {
179 		p = *pp;
180 		_debug("- consider %p", p);
181 		server = rb_entry(p, struct afs_server, uuid_rb);
182 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
183 		if (diff < 0)
184 			pp = &(*pp)->rb_left;
185 		else if (diff > 0)
186 			pp = &(*pp)->rb_right;
187 		else
188 			goto exists;
189 	}
190 
191 	server = candidate;
192 	rb_link_node(&server->uuid_rb, p, pp);
193 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
194 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
195 
196 	write_seqlock(&net->fs_addr_lock);
197 	alist = rcu_dereference_protected(server->addresses,
198 					  lockdep_is_held(&net->fs_addr_lock.lock));
199 
200 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
201 	 * it in the IPv4 and/or IPv6 reverse-map lists.
202 	 *
203 	 * TODO: For speed we want to use something other than a flat list
204 	 * here; even sorting the list in terms of lowest address would help a
205 	 * bit, but anything we might want to do gets messy and memory
206 	 * intensive.
207 	 */
208 	if (alist->nr_ipv4 > 0)
209 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
210 	if (alist->nr_addrs > alist->nr_ipv4)
211 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
212 
213 	write_sequnlock(&net->fs_addr_lock);
214 	ret = 0;
215 
216 exists:
217 	afs_get_server(server);
218 	write_sequnlock(&net->fs_lock);
219 	return server;
220 }
221 
222 /*
223  * allocate a new server record
224  */
225 static struct afs_server *afs_alloc_server(struct afs_net *net,
226 					   const uuid_t *uuid,
227 					   struct afs_addr_list *alist)
228 {
229 	struct afs_server *server;
230 
231 	_enter("");
232 
233 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
234 	if (!server)
235 		goto enomem;
236 
237 	atomic_set(&server->usage, 1);
238 	RCU_INIT_POINTER(server->addresses, alist);
239 	server->addr_version = alist->version;
240 	server->uuid = *uuid;
241 	server->flags = (1UL << AFS_SERVER_FL_NEW);
242 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
243 	rwlock_init(&server->fs_lock);
244 	INIT_LIST_HEAD(&server->cb_interests);
245 	rwlock_init(&server->cb_break_lock);
246 
247 	afs_inc_servers_outstanding(net);
248 	_leave(" = %p", server);
249 	return server;
250 
251 enomem:
252 	_leave(" = NULL [nomem]");
253 	return NULL;
254 }
255 
256 /*
257  * Look up an address record for a server
258  */
259 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
260 						 struct key *key, const uuid_t *uuid)
261 {
262 	struct afs_addr_cursor ac;
263 	struct afs_addr_list *alist;
264 	int ret;
265 
266 	ret = afs_set_vl_cursor(&ac, cell);
267 	if (ret < 0)
268 		return ERR_PTR(ret);
269 
270 	while (afs_iterate_addresses(&ac)) {
271 		if (test_bit(ac.index, &ac.alist->yfs))
272 			alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
273 		else
274 			alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
275 		switch (ac.error) {
276 		case 0:
277 			afs_end_cursor(&ac);
278 			return alist;
279 		case -ECONNABORTED:
280 			ac.error = afs_abort_to_error(ac.abort_code);
281 			goto error;
282 		case -ENOMEM:
283 		case -ENONET:
284 			goto error;
285 		case -ENETUNREACH:
286 		case -EHOSTUNREACH:
287 		case -ECONNREFUSED:
288 			break;
289 		default:
290 			ac.error = -EIO;
291 			goto error;
292 		}
293 	}
294 
295 error:
296 	return ERR_PTR(afs_end_cursor(&ac));
297 }
298 
299 /*
300  * Get or create a fileserver record.
301  */
302 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
303 				     const uuid_t *uuid)
304 {
305 	struct afs_addr_list *alist;
306 	struct afs_server *server, *candidate;
307 
308 	_enter("%p,%pU", cell->net, uuid);
309 
310 	server = afs_find_server_by_uuid(cell->net, uuid);
311 	if (server)
312 		return server;
313 
314 	alist = afs_vl_lookup_addrs(cell, key, uuid);
315 	if (IS_ERR(alist))
316 		return ERR_CAST(alist);
317 
318 	candidate = afs_alloc_server(cell->net, uuid, alist);
319 	if (!candidate) {
320 		afs_put_addrlist(alist);
321 		return ERR_PTR(-ENOMEM);
322 	}
323 
324 	server = afs_install_server(cell->net, candidate);
325 	if (server != candidate) {
326 		afs_put_addrlist(alist);
327 		kfree(candidate);
328 	}
329 
330 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
331 	return server;
332 }
333 
334 /*
335  * Set the server timer to fire after a given delay, assuming it's not already
336  * set for an earlier time.
337  */
338 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
339 {
340 	if (net->live) {
341 		afs_inc_servers_outstanding(net);
342 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
343 			afs_dec_servers_outstanding(net);
344 	}
345 }
346 
347 /*
348  * Server management timer.  We have an increment on fs_outstanding that we
349  * need to pass along to the work item.
350  */
351 void afs_servers_timer(struct timer_list *timer)
352 {
353 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
354 
355 	_enter("");
356 	if (!queue_work(afs_wq, &net->fs_manager))
357 		afs_dec_servers_outstanding(net);
358 }
359 
360 /*
361  * Release a reference on a server record.
362  */
363 void afs_put_server(struct afs_net *net, struct afs_server *server)
364 {
365 	unsigned int usage;
366 
367 	if (!server)
368 		return;
369 
370 	server->put_time = ktime_get_real_seconds();
371 
372 	usage = atomic_dec_return(&server->usage);
373 
374 	_enter("{%u}", usage);
375 
376 	if (likely(usage > 0))
377 		return;
378 
379 	afs_set_server_timer(net, afs_server_gc_delay);
380 }
381 
382 static void afs_server_rcu(struct rcu_head *rcu)
383 {
384 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
385 
386 	afs_put_addrlist(rcu_access_pointer(server->addresses));
387 	kfree(server);
388 }
389 
390 /*
391  * destroy a dead server
392  */
393 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
394 {
395 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
396 	struct afs_addr_cursor ac = {
397 		.alist	= alist,
398 		.addr	= &alist->addrs[0],
399 		.start	= alist->index,
400 		.index	= alist->index,
401 		.error	= 0,
402 	};
403 	_enter("%p", server);
404 
405 	afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
406 	call_rcu(&server->rcu, afs_server_rcu);
407 	afs_dec_servers_outstanding(net);
408 }
409 
410 /*
411  * Garbage collect any expired servers.
412  */
413 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
414 {
415 	struct afs_server *server;
416 	bool deleted;
417 	int usage;
418 
419 	while ((server = gc_list)) {
420 		gc_list = server->gc_next;
421 
422 		write_seqlock(&net->fs_lock);
423 		usage = 1;
424 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
425 		if (deleted) {
426 			rb_erase(&server->uuid_rb, &net->fs_servers);
427 			hlist_del_rcu(&server->proc_link);
428 		}
429 		write_sequnlock(&net->fs_lock);
430 
431 		if (deleted) {
432 			write_seqlock(&net->fs_addr_lock);
433 			if (!hlist_unhashed(&server->addr4_link))
434 				hlist_del_rcu(&server->addr4_link);
435 			if (!hlist_unhashed(&server->addr6_link))
436 				hlist_del_rcu(&server->addr6_link);
437 			write_sequnlock(&net->fs_addr_lock);
438 			afs_destroy_server(net, server);
439 		}
440 	}
441 }
442 
443 /*
444  * Manage the records of servers known to be within a network namespace.  This
445  * includes garbage collecting unused servers.
446  *
447  * Note also that we were given an increment on net->servers_outstanding by
448  * whoever queued us that we need to deal with before returning.
449  */
450 void afs_manage_servers(struct work_struct *work)
451 {
452 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
453 	struct afs_server *gc_list = NULL;
454 	struct rb_node *cursor;
455 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
456 	bool purging = !net->live;
457 
458 	_enter("");
459 
460 	/* Trawl the server list looking for servers that have expired from
461 	 * lack of use.
462 	 */
463 	read_seqlock_excl(&net->fs_lock);
464 
465 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
466 		struct afs_server *server =
467 			rb_entry(cursor, struct afs_server, uuid_rb);
468 		int usage = atomic_read(&server->usage);
469 
470 		_debug("manage %pU %u", &server->uuid, usage);
471 
472 		ASSERTCMP(usage, >=, 1);
473 		ASSERTIFCMP(purging, usage, ==, 1);
474 
475 		if (usage == 1) {
476 			time64_t expire_at = server->put_time;
477 
478 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
479 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
480 				expire_at += afs_server_gc_delay;
481 			if (purging || expire_at <= now) {
482 				server->gc_next = gc_list;
483 				gc_list = server;
484 			} else if (expire_at < next_manage) {
485 				next_manage = expire_at;
486 			}
487 		}
488 	}
489 
490 	read_sequnlock_excl(&net->fs_lock);
491 
492 	/* Update the timer on the way out.  We have to pass an increment on
493 	 * servers_outstanding in the namespace that we are in to the timer or
494 	 * the work scheduler.
495 	 */
496 	if (!purging && next_manage < TIME64_MAX) {
497 		now = ktime_get_real_seconds();
498 
499 		if (next_manage - now <= 0) {
500 			if (queue_work(afs_wq, &net->fs_manager))
501 				afs_inc_servers_outstanding(net);
502 		} else {
503 			afs_set_server_timer(net, next_manage - now);
504 		}
505 	}
506 
507 	afs_gc_servers(net, gc_list);
508 
509 	afs_dec_servers_outstanding(net);
510 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
511 }
512 
513 static void afs_queue_server_manager(struct afs_net *net)
514 {
515 	afs_inc_servers_outstanding(net);
516 	if (!queue_work(afs_wq, &net->fs_manager))
517 		afs_dec_servers_outstanding(net);
518 }
519 
520 /*
521  * Purge list of servers.
522  */
523 void afs_purge_servers(struct afs_net *net)
524 {
525 	_enter("");
526 
527 	if (del_timer_sync(&net->fs_timer))
528 		atomic_dec(&net->servers_outstanding);
529 
530 	afs_queue_server_manager(net);
531 
532 	_debug("wait");
533 	wait_var_event(&net->servers_outstanding,
534 		       !atomic_read(&net->servers_outstanding));
535 	_leave("");
536 }
537 
538 /*
539  * Probe a fileserver to find its capabilities.
540  *
541  * TODO: Try service upgrade.
542  */
543 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
544 {
545 	_enter("");
546 
547 	fc->ac.addr = NULL;
548 	fc->ac.start = READ_ONCE(fc->ac.alist->index);
549 	fc->ac.index = fc->ac.start;
550 	fc->ac.error = 0;
551 	fc->ac.begun = false;
552 
553 	while (afs_iterate_addresses(&fc->ac)) {
554 		afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
555 					&fc->ac, fc->key);
556 		switch (fc->ac.error) {
557 		case 0:
558 			afs_end_cursor(&fc->ac);
559 			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
560 			return true;
561 		case -ECONNABORTED:
562 			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
563 			goto error;
564 		case -ENOMEM:
565 		case -ENONET:
566 			goto error;
567 		case -ENETUNREACH:
568 		case -EHOSTUNREACH:
569 		case -ECONNREFUSED:
570 		case -ETIMEDOUT:
571 		case -ETIME:
572 			break;
573 		default:
574 			fc->ac.error = -EIO;
575 			goto error;
576 		}
577 	}
578 
579 error:
580 	afs_end_cursor(&fc->ac);
581 	return false;
582 }
583 
584 /*
585  * If we haven't already, try probing the fileserver to get its capabilities.
586  * We try not to instigate parallel probes, but it's possible that the parallel
587  * probes will fail due to authentication failure when ours would succeed.
588  *
589  * TODO: Try sending an anonymous probe if an authenticated probe fails.
590  */
591 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
592 {
593 	bool success;
594 	int ret, retries = 0;
595 
596 	_enter("");
597 
598 retry:
599 	if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
600 		_leave(" = t");
601 		return true;
602 	}
603 
604 	if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
605 		success = afs_do_probe_fileserver(fc);
606 		clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
607 		wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
608 		_leave(" = t");
609 		return success;
610 	}
611 
612 	_debug("wait");
613 	ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
614 			  TASK_INTERRUPTIBLE);
615 	if (ret == -ERESTARTSYS) {
616 		fc->ac.error = ret;
617 		_leave(" = f [%d]", ret);
618 		return false;
619 	}
620 
621 	retries++;
622 	if (retries == 4) {
623 		fc->ac.error = -ESTALE;
624 		_leave(" = f [stale]");
625 		return false;
626 	}
627 	_debug("retry");
628 	goto retry;
629 }
630 
631 /*
632  * Get an update for a server's address list.
633  */
634 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
635 {
636 	struct afs_addr_list *alist, *discard;
637 
638 	_enter("");
639 
640 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
641 				    &server->uuid);
642 	if (IS_ERR(alist)) {
643 		fc->ac.error = PTR_ERR(alist);
644 		_leave(" = f [%d]", fc->ac.error);
645 		return false;
646 	}
647 
648 	discard = alist;
649 	if (server->addr_version != alist->version) {
650 		write_lock(&server->fs_lock);
651 		discard = rcu_dereference_protected(server->addresses,
652 						    lockdep_is_held(&server->fs_lock));
653 		rcu_assign_pointer(server->addresses, alist);
654 		server->addr_version = alist->version;
655 		write_unlock(&server->fs_lock);
656 	}
657 
658 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
659 	afs_put_addrlist(discard);
660 	_leave(" = t");
661 	return true;
662 }
663 
664 /*
665  * See if a server's address list needs updating.
666  */
667 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
668 {
669 	time64_t now = ktime_get_real_seconds();
670 	long diff;
671 	bool success;
672 	int ret, retries = 0;
673 
674 	_enter("");
675 
676 	ASSERT(server);
677 
678 retry:
679 	diff = READ_ONCE(server->update_at) - now;
680 	if (diff > 0) {
681 		_leave(" = t [not now %ld]", diff);
682 		return true;
683 	}
684 
685 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
686 		success = afs_update_server_record(fc, server);
687 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
688 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
689 		_leave(" = %d", success);
690 		return success;
691 	}
692 
693 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
694 			  TASK_INTERRUPTIBLE);
695 	if (ret == -ERESTARTSYS) {
696 		fc->ac.error = ret;
697 		_leave(" = f [intr]");
698 		return false;
699 	}
700 
701 	retries++;
702 	if (retries == 4) {
703 		_leave(" = f [stale]");
704 		ret = -ESTALE;
705 		return false;
706 	}
707 	goto retry;
708 }
709