xref: /openbmc/linux/fs/afs/server.c (revision 160b8e75)
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16 
17 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
19 
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22 	atomic_inc(&net->servers_outstanding);
23 }
24 
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27 	if (atomic_dec_and_test(&net->servers_outstanding))
28 		wake_up_atomic_t(&net->servers_outstanding);
29 }
30 
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35 				   const struct sockaddr_rxrpc *srx)
36 {
37 	const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38 	const struct afs_addr_list *alist;
39 	struct afs_server *server = NULL;
40 	unsigned int i;
41 	bool ipv6 = true;
42 	int seq = 0, diff;
43 
44 	if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45 	    srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46 	    srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47 		ipv6 = false;
48 
49 	rcu_read_lock();
50 
51 	do {
52 		if (server)
53 			afs_put_server(net, server);
54 		server = NULL;
55 		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56 
57 		if (ipv6) {
58 			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59 				alist = rcu_dereference(server->addresses);
60 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61 					b = &alist->addrs[i].transport.sin6;
62 					diff = (u16)a->sin6_port - (u16)b->sin6_port;
63 					if (diff == 0)
64 						diff = memcmp(&a->sin6_addr,
65 							      &b->sin6_addr,
66 							      sizeof(struct in6_addr));
67 					if (diff == 0)
68 						goto found;
69 					if (diff < 0) {
70 						// TODO: Sort the list
71 						//if (i == alist->nr_ipv4)
72 						//	goto not_found;
73 						break;
74 					}
75 				}
76 			}
77 		} else {
78 			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
79 				alist = rcu_dereference(server->addresses);
80 				for (i = 0; i < alist->nr_ipv4; i++) {
81 					b = &alist->addrs[i].transport.sin6;
82 					diff = (u16)a->sin6_port - (u16)b->sin6_port;
83 					if (diff == 0)
84 						diff = ((u32)a->sin6_addr.s6_addr32[3] -
85 							(u32)b->sin6_addr.s6_addr32[3]);
86 					if (diff == 0)
87 						goto found;
88 					if (diff < 0) {
89 						// TODO: Sort the list
90 						//if (i == 0)
91 						//	goto not_found;
92 						break;
93 					}
94 				}
95 			}
96 		}
97 
98 	//not_found:
99 		server = NULL;
100 	found:
101 		if (server && !atomic_inc_not_zero(&server->usage))
102 			server = NULL;
103 
104 	} while (need_seqretry(&net->fs_addr_lock, seq));
105 
106 	done_seqretry(&net->fs_addr_lock, seq);
107 
108 	rcu_read_unlock();
109 	return server;
110 }
111 
112 /*
113  * Look up a server by its UUID
114  */
115 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
116 {
117 	struct afs_server *server = NULL;
118 	struct rb_node *p;
119 	int diff, seq = 0;
120 
121 	_enter("%pU", uuid);
122 
123 	do {
124 		/* Unfortunately, rbtree walking doesn't give reliable results
125 		 * under just the RCU read lock, so we have to check for
126 		 * changes.
127 		 */
128 		if (server)
129 			afs_put_server(net, server);
130 		server = NULL;
131 
132 		read_seqbegin_or_lock(&net->fs_lock, &seq);
133 
134 		p = net->fs_servers.rb_node;
135 		while (p) {
136 			server = rb_entry(p, struct afs_server, uuid_rb);
137 
138 			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
139 			if (diff < 0) {
140 				p = p->rb_left;
141 			} else if (diff > 0) {
142 				p = p->rb_right;
143 			} else {
144 				afs_get_server(server);
145 				break;
146 			}
147 
148 			server = NULL;
149 		}
150 	} while (need_seqretry(&net->fs_lock, seq));
151 
152 	done_seqretry(&net->fs_lock, seq);
153 
154 	_leave(" = %p", server);
155 	return server;
156 }
157 
158 /*
159  * Install a server record in the namespace tree
160  */
161 static struct afs_server *afs_install_server(struct afs_net *net,
162 					     struct afs_server *candidate)
163 {
164 	const struct afs_addr_list *alist;
165 	struct afs_server *server;
166 	struct rb_node **pp, *p;
167 	int ret = -EEXIST, diff;
168 
169 	_enter("%p", candidate);
170 
171 	write_seqlock(&net->fs_lock);
172 
173 	/* Firstly install the server in the UUID lookup tree */
174 	pp = &net->fs_servers.rb_node;
175 	p = NULL;
176 	while (*pp) {
177 		p = *pp;
178 		_debug("- consider %p", p);
179 		server = rb_entry(p, struct afs_server, uuid_rb);
180 		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
181 		if (diff < 0)
182 			pp = &(*pp)->rb_left;
183 		else if (diff > 0)
184 			pp = &(*pp)->rb_right;
185 		else
186 			goto exists;
187 	}
188 
189 	server = candidate;
190 	rb_link_node(&server->uuid_rb, p, pp);
191 	rb_insert_color(&server->uuid_rb, &net->fs_servers);
192 	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
193 
194 	write_seqlock(&net->fs_addr_lock);
195 	alist = rcu_dereference_protected(server->addresses,
196 					  lockdep_is_held(&net->fs_addr_lock.lock));
197 
198 	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
199 	 * it in the IPv4 and/or IPv6 reverse-map lists.
200 	 *
201 	 * TODO: For speed we want to use something other than a flat list
202 	 * here; even sorting the list in terms of lowest address would help a
203 	 * bit, but anything we might want to do gets messy and memory
204 	 * intensive.
205 	 */
206 	if (alist->nr_ipv4 > 0)
207 		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
208 	if (alist->nr_addrs > alist->nr_ipv4)
209 		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
210 
211 	write_sequnlock(&net->fs_addr_lock);
212 	ret = 0;
213 
214 exists:
215 	afs_get_server(server);
216 	write_sequnlock(&net->fs_lock);
217 	return server;
218 }
219 
220 /*
221  * allocate a new server record
222  */
223 static struct afs_server *afs_alloc_server(struct afs_net *net,
224 					   const uuid_t *uuid,
225 					   struct afs_addr_list *alist)
226 {
227 	struct afs_server *server;
228 
229 	_enter("");
230 
231 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
232 	if (!server)
233 		goto enomem;
234 
235 	atomic_set(&server->usage, 1);
236 	RCU_INIT_POINTER(server->addresses, alist);
237 	server->addr_version = alist->version;
238 	server->uuid = *uuid;
239 	server->flags = (1UL << AFS_SERVER_FL_NEW);
240 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
241 	rwlock_init(&server->fs_lock);
242 	INIT_LIST_HEAD(&server->cb_interests);
243 	rwlock_init(&server->cb_break_lock);
244 
245 	afs_inc_servers_outstanding(net);
246 	_leave(" = %p", server);
247 	return server;
248 
249 enomem:
250 	_leave(" = NULL [nomem]");
251 	return NULL;
252 }
253 
254 /*
255  * Look up an address record for a server
256  */
257 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
258 						 struct key *key, const uuid_t *uuid)
259 {
260 	struct afs_addr_cursor ac;
261 	struct afs_addr_list *alist;
262 	int ret;
263 
264 	ret = afs_set_vl_cursor(&ac, cell);
265 	if (ret < 0)
266 		return ERR_PTR(ret);
267 
268 	while (afs_iterate_addresses(&ac)) {
269 		if (test_bit(ac.index, &ac.alist->yfs))
270 			alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
271 		else
272 			alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
273 		switch (ac.error) {
274 		case 0:
275 			afs_end_cursor(&ac);
276 			return alist;
277 		case -ECONNABORTED:
278 			ac.error = afs_abort_to_error(ac.abort_code);
279 			goto error;
280 		case -ENOMEM:
281 		case -ENONET:
282 			goto error;
283 		case -ENETUNREACH:
284 		case -EHOSTUNREACH:
285 		case -ECONNREFUSED:
286 			break;
287 		default:
288 			ac.error = -EIO;
289 			goto error;
290 		}
291 	}
292 
293 error:
294 	return ERR_PTR(afs_end_cursor(&ac));
295 }
296 
297 /*
298  * Get or create a fileserver record.
299  */
300 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
301 				     const uuid_t *uuid)
302 {
303 	struct afs_addr_list *alist;
304 	struct afs_server *server, *candidate;
305 
306 	_enter("%p,%pU", cell->net, uuid);
307 
308 	server = afs_find_server_by_uuid(cell->net, uuid);
309 	if (server)
310 		return server;
311 
312 	alist = afs_vl_lookup_addrs(cell, key, uuid);
313 	if (IS_ERR(alist))
314 		return ERR_CAST(alist);
315 
316 	candidate = afs_alloc_server(cell->net, uuid, alist);
317 	if (!candidate) {
318 		afs_put_addrlist(alist);
319 		return ERR_PTR(-ENOMEM);
320 	}
321 
322 	server = afs_install_server(cell->net, candidate);
323 	if (server != candidate) {
324 		afs_put_addrlist(alist);
325 		kfree(candidate);
326 	}
327 
328 	_leave(" = %p{%d}", server, atomic_read(&server->usage));
329 	return server;
330 }
331 
332 /*
333  * Set the server timer to fire after a given delay, assuming it's not already
334  * set for an earlier time.
335  */
336 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
337 {
338 	if (net->live) {
339 		afs_inc_servers_outstanding(net);
340 		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
341 			afs_dec_servers_outstanding(net);
342 	}
343 }
344 
345 /*
346  * Server management timer.  We have an increment on fs_outstanding that we
347  * need to pass along to the work item.
348  */
349 void afs_servers_timer(struct timer_list *timer)
350 {
351 	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
352 
353 	_enter("");
354 	if (!queue_work(afs_wq, &net->fs_manager))
355 		afs_dec_servers_outstanding(net);
356 }
357 
358 /*
359  * Release a reference on a server record.
360  */
361 void afs_put_server(struct afs_net *net, struct afs_server *server)
362 {
363 	unsigned int usage;
364 
365 	if (!server)
366 		return;
367 
368 	server->put_time = ktime_get_real_seconds();
369 
370 	usage = atomic_dec_return(&server->usage);
371 
372 	_enter("{%u}", usage);
373 
374 	if (likely(usage > 0))
375 		return;
376 
377 	afs_set_server_timer(net, afs_server_gc_delay);
378 }
379 
380 static void afs_server_rcu(struct rcu_head *rcu)
381 {
382 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
383 
384 	afs_put_addrlist(server->addresses);
385 	kfree(server);
386 }
387 
388 /*
389  * destroy a dead server
390  */
391 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
392 {
393 	struct afs_addr_list *alist = server->addresses;
394 	struct afs_addr_cursor ac = {
395 		.alist	= alist,
396 		.addr	= &alist->addrs[0],
397 		.start	= alist->index,
398 		.index	= alist->index,
399 		.error	= 0,
400 	};
401 	_enter("%p", server);
402 
403 	afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
404 	call_rcu(&server->rcu, afs_server_rcu);
405 	afs_dec_servers_outstanding(net);
406 }
407 
408 /*
409  * Garbage collect any expired servers.
410  */
411 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
412 {
413 	struct afs_server *server;
414 	bool deleted;
415 	int usage;
416 
417 	while ((server = gc_list)) {
418 		gc_list = server->gc_next;
419 
420 		write_seqlock(&net->fs_lock);
421 		usage = 1;
422 		deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
423 		if (deleted) {
424 			rb_erase(&server->uuid_rb, &net->fs_servers);
425 			hlist_del_rcu(&server->proc_link);
426 		}
427 		write_sequnlock(&net->fs_lock);
428 
429 		if (deleted)
430 			afs_destroy_server(net, server);
431 	}
432 }
433 
434 /*
435  * Manage the records of servers known to be within a network namespace.  This
436  * includes garbage collecting unused servers.
437  *
438  * Note also that we were given an increment on net->servers_outstanding by
439  * whoever queued us that we need to deal with before returning.
440  */
441 void afs_manage_servers(struct work_struct *work)
442 {
443 	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
444 	struct afs_server *gc_list = NULL;
445 	struct rb_node *cursor;
446 	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
447 	bool purging = !net->live;
448 
449 	_enter("");
450 
451 	/* Trawl the server list looking for servers that have expired from
452 	 * lack of use.
453 	 */
454 	read_seqlock_excl(&net->fs_lock);
455 
456 	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
457 		struct afs_server *server =
458 			rb_entry(cursor, struct afs_server, uuid_rb);
459 		int usage = atomic_read(&server->usage);
460 
461 		_debug("manage %pU %u", &server->uuid, usage);
462 
463 		ASSERTCMP(usage, >=, 1);
464 		ASSERTIFCMP(purging, usage, ==, 1);
465 
466 		if (usage == 1) {
467 			time64_t expire_at = server->put_time;
468 
469 			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
470 			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
471 				expire_at += afs_server_gc_delay;
472 			if (purging || expire_at <= now) {
473 				server->gc_next = gc_list;
474 				gc_list = server;
475 			} else if (expire_at < next_manage) {
476 				next_manage = expire_at;
477 			}
478 		}
479 	}
480 
481 	read_sequnlock_excl(&net->fs_lock);
482 
483 	/* Update the timer on the way out.  We have to pass an increment on
484 	 * servers_outstanding in the namespace that we are in to the timer or
485 	 * the work scheduler.
486 	 */
487 	if (!purging && next_manage < TIME64_MAX) {
488 		now = ktime_get_real_seconds();
489 
490 		if (next_manage - now <= 0) {
491 			if (queue_work(afs_wq, &net->fs_manager))
492 				afs_inc_servers_outstanding(net);
493 		} else {
494 			afs_set_server_timer(net, next_manage - now);
495 		}
496 	}
497 
498 	afs_gc_servers(net, gc_list);
499 
500 	afs_dec_servers_outstanding(net);
501 	_leave(" [%d]", atomic_read(&net->servers_outstanding));
502 }
503 
504 static void afs_queue_server_manager(struct afs_net *net)
505 {
506 	afs_inc_servers_outstanding(net);
507 	if (!queue_work(afs_wq, &net->fs_manager))
508 		afs_dec_servers_outstanding(net);
509 }
510 
511 /*
512  * Purge list of servers.
513  */
514 void afs_purge_servers(struct afs_net *net)
515 {
516 	_enter("");
517 
518 	if (del_timer_sync(&net->fs_timer))
519 		atomic_dec(&net->servers_outstanding);
520 
521 	afs_queue_server_manager(net);
522 
523 	_debug("wait");
524 	wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
525 			 TASK_UNINTERRUPTIBLE);
526 	_leave("");
527 }
528 
529 /*
530  * Probe a fileserver to find its capabilities.
531  *
532  * TODO: Try service upgrade.
533  */
534 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
535 {
536 	_enter("");
537 
538 	fc->ac.addr = NULL;
539 	fc->ac.start = READ_ONCE(fc->ac.alist->index);
540 	fc->ac.index = fc->ac.start;
541 	fc->ac.error = 0;
542 	fc->ac.begun = false;
543 
544 	while (afs_iterate_addresses(&fc->ac)) {
545 		afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
546 					&fc->ac, fc->key);
547 		switch (fc->ac.error) {
548 		case 0:
549 			afs_end_cursor(&fc->ac);
550 			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
551 			return true;
552 		case -ECONNABORTED:
553 			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
554 			goto error;
555 		case -ENOMEM:
556 		case -ENONET:
557 			goto error;
558 		case -ENETUNREACH:
559 		case -EHOSTUNREACH:
560 		case -ECONNREFUSED:
561 		case -ETIMEDOUT:
562 		case -ETIME:
563 			break;
564 		default:
565 			fc->ac.error = -EIO;
566 			goto error;
567 		}
568 	}
569 
570 error:
571 	afs_end_cursor(&fc->ac);
572 	return false;
573 }
574 
575 /*
576  * If we haven't already, try probing the fileserver to get its capabilities.
577  * We try not to instigate parallel probes, but it's possible that the parallel
578  * probes will fail due to authentication failure when ours would succeed.
579  *
580  * TODO: Try sending an anonymous probe if an authenticated probe fails.
581  */
582 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
583 {
584 	bool success;
585 	int ret, retries = 0;
586 
587 	_enter("");
588 
589 retry:
590 	if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
591 		_leave(" = t");
592 		return true;
593 	}
594 
595 	if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
596 		success = afs_do_probe_fileserver(fc);
597 		clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
598 		wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
599 		_leave(" = t");
600 		return success;
601 	}
602 
603 	_debug("wait");
604 	ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
605 			  TASK_INTERRUPTIBLE);
606 	if (ret == -ERESTARTSYS) {
607 		fc->ac.error = ret;
608 		_leave(" = f [%d]", ret);
609 		return false;
610 	}
611 
612 	retries++;
613 	if (retries == 4) {
614 		fc->ac.error = -ESTALE;
615 		_leave(" = f [stale]");
616 		return false;
617 	}
618 	_debug("retry");
619 	goto retry;
620 }
621 
622 /*
623  * Get an update for a server's address list.
624  */
625 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
626 {
627 	struct afs_addr_list *alist, *discard;
628 
629 	_enter("");
630 
631 	alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
632 				    &server->uuid);
633 	if (IS_ERR(alist)) {
634 		fc->ac.error = PTR_ERR(alist);
635 		_leave(" = f [%d]", fc->ac.error);
636 		return false;
637 	}
638 
639 	discard = alist;
640 	if (server->addr_version != alist->version) {
641 		write_lock(&server->fs_lock);
642 		discard = rcu_dereference_protected(server->addresses,
643 						    lockdep_is_held(&server->fs_lock));
644 		rcu_assign_pointer(server->addresses, alist);
645 		server->addr_version = alist->version;
646 		write_unlock(&server->fs_lock);
647 	}
648 
649 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
650 	afs_put_addrlist(discard);
651 	_leave(" = t");
652 	return true;
653 }
654 
655 /*
656  * See if a server's address list needs updating.
657  */
658 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
659 {
660 	time64_t now = ktime_get_real_seconds();
661 	long diff;
662 	bool success;
663 	int ret, retries = 0;
664 
665 	_enter("");
666 
667 	ASSERT(server);
668 
669 retry:
670 	diff = READ_ONCE(server->update_at) - now;
671 	if (diff > 0) {
672 		_leave(" = t [not now %ld]", diff);
673 		return true;
674 	}
675 
676 	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
677 		success = afs_update_server_record(fc, server);
678 		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
679 		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
680 		_leave(" = %d", success);
681 		return success;
682 	}
683 
684 	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
685 			  TASK_INTERRUPTIBLE);
686 	if (ret == -ERESTARTSYS) {
687 		fc->ac.error = ret;
688 		_leave(" = f [intr]");
689 		return false;
690 	}
691 
692 	retries++;
693 	if (retries == 4) {
694 		_leave(" = f [stale]");
695 		ret = -ESTALE;
696 		return false;
697 	}
698 	goto retry;
699 }
700