xref: /openbmc/linux/fs/afs/fs_probe.c (revision 2fa5ebe3)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
3  *
4  * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_afs.h"
13 #include "protocol_yfs.h"
14 
15 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
16 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
17 
18 /*
19  * Start the probe polling timer.  We have to supply it with an inc on the
20  * outstanding server count.
21  */
22 static void afs_schedule_fs_probe(struct afs_net *net,
23 				  struct afs_server *server, bool fast)
24 {
25 	unsigned long atj;
26 
27 	if (!net->live)
28 		return;
29 
30 	atj = server->probed_at;
31 	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
32 
33 	afs_inc_servers_outstanding(net);
34 	if (timer_reduce(&net->fs_probe_timer, atj))
35 		afs_dec_servers_outstanding(net);
36 }
37 
38 /*
39  * Handle the completion of a set of probes.
40  */
41 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
42 {
43 	bool responded = server->probe.responded;
44 
45 	write_seqlock(&net->fs_lock);
46 	if (responded) {
47 		list_add_tail(&server->probe_link, &net->fs_probe_slow);
48 	} else {
49 		server->rtt = UINT_MAX;
50 		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
51 		list_add_tail(&server->probe_link, &net->fs_probe_fast);
52 	}
53 	write_sequnlock(&net->fs_lock);
54 
55 	afs_schedule_fs_probe(net, server, !responded);
56 }
57 
58 /*
59  * Handle the completion of a probe.
60  */
61 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
62 {
63 	_enter("");
64 
65 	if (atomic_dec_and_test(&server->probe_outstanding))
66 		afs_finished_fs_probe(net, server);
67 
68 	wake_up_all(&server->probe_wq);
69 }
70 
71 /*
72  * Handle inability to send a probe due to ENOMEM when trying to allocate a
73  * call struct.
74  */
75 static void afs_fs_probe_not_done(struct afs_net *net,
76 				  struct afs_server *server,
77 				  struct afs_addr_cursor *ac)
78 {
79 	struct afs_addr_list *alist = ac->alist;
80 	unsigned int index = ac->index;
81 
82 	_enter("");
83 
84 	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
85 	spin_lock(&server->probe_lock);
86 
87 	server->probe.local_failure = true;
88 	if (server->probe.error == 0)
89 		server->probe.error = -ENOMEM;
90 
91 	set_bit(index, &alist->failed);
92 
93 	spin_unlock(&server->probe_lock);
94 	return afs_done_one_fs_probe(net, server);
95 }
96 
97 /*
98  * Process the result of probing a fileserver.  This is called after successful
99  * or failed delivery of an FS.GetCapabilities operation.
100  */
101 void afs_fileserver_probe_result(struct afs_call *call)
102 {
103 	struct afs_addr_list *alist = call->alist;
104 	struct afs_server *server = call->server;
105 	unsigned int index = call->addr_ix;
106 	unsigned int rtt_us = 0, cap0;
107 	int ret = call->error;
108 
109 	_enter("%pU,%u", &server->uuid, index);
110 
111 	spin_lock(&server->probe_lock);
112 
113 	switch (ret) {
114 	case 0:
115 		server->probe.error = 0;
116 		goto responded;
117 	case -ECONNABORTED:
118 		if (!server->probe.responded) {
119 			server->probe.abort_code = call->abort_code;
120 			server->probe.error = ret;
121 		}
122 		goto responded;
123 	case -ENOMEM:
124 	case -ENONET:
125 		clear_bit(index, &alist->responded);
126 		server->probe.local_failure = true;
127 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
128 		goto out;
129 	case -ECONNRESET: /* Responded, but call expired. */
130 	case -ERFKILL:
131 	case -EADDRNOTAVAIL:
132 	case -ENETUNREACH:
133 	case -EHOSTUNREACH:
134 	case -EHOSTDOWN:
135 	case -ECONNREFUSED:
136 	case -ETIMEDOUT:
137 	case -ETIME:
138 	default:
139 		clear_bit(index, &alist->responded);
140 		set_bit(index, &alist->failed);
141 		if (!server->probe.responded &&
142 		    (server->probe.error == 0 ||
143 		     server->probe.error == -ETIMEDOUT ||
144 		     server->probe.error == -ETIME))
145 			server->probe.error = ret;
146 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
147 		goto out;
148 	}
149 
150 responded:
151 	clear_bit(index, &alist->failed);
152 
153 	if (call->service_id == YFS_FS_SERVICE) {
154 		server->probe.is_yfs = true;
155 		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
156 		alist->addrs[index].srx_service = call->service_id;
157 	} else {
158 		server->probe.not_yfs = true;
159 		if (!server->probe.is_yfs) {
160 			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
161 			alist->addrs[index].srx_service = call->service_id;
162 		}
163 		cap0 = ntohl(call->tmp);
164 		if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
165 			set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
166 		else
167 			clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
168 	}
169 
170 	rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
171 	if (rtt_us < server->probe.rtt) {
172 		server->probe.rtt = rtt_us;
173 		server->rtt = rtt_us;
174 		alist->preferred = index;
175 	}
176 
177 	smp_wmb(); /* Set rtt before responded. */
178 	server->probe.responded = true;
179 	set_bit(index, &alist->responded);
180 	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
181 out:
182 	spin_unlock(&server->probe_lock);
183 
184 	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
185 	       &server->uuid, index, &alist->addrs[index].transport,
186 	       rtt_us, ret);
187 
188 	return afs_done_one_fs_probe(call->net, server);
189 }
190 
191 /*
192  * Probe one or all of a fileserver's addresses to find out the best route and
193  * to query its capabilities.
194  */
195 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
196 			     struct key *key, bool all)
197 {
198 	struct afs_addr_cursor ac = {
199 		.index = 0,
200 	};
201 
202 	_enter("%pU", &server->uuid);
203 
204 	read_lock(&server->fs_lock);
205 	ac.alist = rcu_dereference_protected(server->addresses,
206 					     lockdep_is_held(&server->fs_lock));
207 	afs_get_addrlist(ac.alist);
208 	read_unlock(&server->fs_lock);
209 
210 	server->probed_at = jiffies;
211 	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
212 	memset(&server->probe, 0, sizeof(server->probe));
213 	server->probe.rtt = UINT_MAX;
214 
215 	ac.index = ac.alist->preferred;
216 	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
217 		all = true;
218 
219 	if (all) {
220 		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
221 			if (!afs_fs_get_capabilities(net, server, &ac, key))
222 				afs_fs_probe_not_done(net, server, &ac);
223 	} else {
224 		if (!afs_fs_get_capabilities(net, server, &ac, key))
225 			afs_fs_probe_not_done(net, server, &ac);
226 	}
227 
228 	afs_put_addrlist(ac.alist);
229 }
230 
231 /*
232  * Wait for the first as-yet untried fileserver to respond.
233  */
234 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
235 {
236 	struct wait_queue_entry *waits;
237 	struct afs_server *server;
238 	unsigned int rtt = UINT_MAX, rtt_s;
239 	bool have_responders = false;
240 	int pref = -1, i;
241 
242 	_enter("%u,%lx", slist->nr_servers, untried);
243 
244 	/* Only wait for servers that have a probe outstanding. */
245 	for (i = 0; i < slist->nr_servers; i++) {
246 		if (test_bit(i, &untried)) {
247 			server = slist->servers[i].server;
248 			if (!atomic_read(&server->probe_outstanding))
249 				__clear_bit(i, &untried);
250 			if (server->probe.responded)
251 				have_responders = true;
252 		}
253 	}
254 	if (have_responders || !untried)
255 		return 0;
256 
257 	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
258 	if (!waits)
259 		return -ENOMEM;
260 
261 	for (i = 0; i < slist->nr_servers; i++) {
262 		if (test_bit(i, &untried)) {
263 			server = slist->servers[i].server;
264 			init_waitqueue_entry(&waits[i], current);
265 			add_wait_queue(&server->probe_wq, &waits[i]);
266 		}
267 	}
268 
269 	for (;;) {
270 		bool still_probing = false;
271 
272 		set_current_state(TASK_INTERRUPTIBLE);
273 		for (i = 0; i < slist->nr_servers; i++) {
274 			if (test_bit(i, &untried)) {
275 				server = slist->servers[i].server;
276 				if (server->probe.responded)
277 					goto stop;
278 				if (atomic_read(&server->probe_outstanding))
279 					still_probing = true;
280 			}
281 		}
282 
283 		if (!still_probing || signal_pending(current))
284 			goto stop;
285 		schedule();
286 	}
287 
288 stop:
289 	set_current_state(TASK_RUNNING);
290 
291 	for (i = 0; i < slist->nr_servers; i++) {
292 		if (test_bit(i, &untried)) {
293 			server = slist->servers[i].server;
294 			rtt_s = READ_ONCE(server->rtt);
295 			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
296 			    rtt_s < rtt) {
297 				pref = i;
298 				rtt = rtt_s;
299 			}
300 
301 			remove_wait_queue(&server->probe_wq, &waits[i]);
302 		}
303 	}
304 
305 	kfree(waits);
306 
307 	if (pref == -1 && signal_pending(current))
308 		return -ERESTARTSYS;
309 
310 	if (pref >= 0)
311 		slist->preferred = pref;
312 	return 0;
313 }
314 
315 /*
316  * Probe timer.  We have an increment on fs_outstanding that we need to pass
317  * along to the work item.
318  */
319 void afs_fs_probe_timer(struct timer_list *timer)
320 {
321 	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
322 
323 	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
324 		afs_dec_servers_outstanding(net);
325 }
326 
327 /*
328  * Dispatch a probe to a server.
329  */
330 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
331 	__releases(&net->fs_lock)
332 {
333 	struct key *key = NULL;
334 
335 	/* We remove it from the queues here - it will be added back to
336 	 * one of the queues on the completion of the probe.
337 	 */
338 	list_del_init(&server->probe_link);
339 
340 	afs_get_server(server, afs_server_trace_get_probe);
341 	write_sequnlock(&net->fs_lock);
342 
343 	afs_fs_probe_fileserver(net, server, key, all);
344 	afs_put_server(net, server, afs_server_trace_put_probe);
345 }
346 
347 /*
348  * Probe a server immediately without waiting for its due time to come
349  * round.  This is used when all of the addresses have been tried.
350  */
351 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
352 {
353 	write_seqlock(&net->fs_lock);
354 	if (!list_empty(&server->probe_link))
355 		return afs_dispatch_fs_probe(net, server, true);
356 	write_sequnlock(&net->fs_lock);
357 }
358 
359 /*
360  * Probe dispatcher to regularly dispatch probes to keep NAT alive.
361  */
362 void afs_fs_probe_dispatcher(struct work_struct *work)
363 {
364 	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
365 	struct afs_server *fast, *slow, *server;
366 	unsigned long nowj, timer_at, poll_at;
367 	bool first_pass = true, set_timer = false;
368 
369 	if (!net->live) {
370 		afs_dec_servers_outstanding(net);
371 		return;
372 	}
373 
374 	_enter("");
375 
376 	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
377 		afs_dec_servers_outstanding(net);
378 		_leave(" [none]");
379 		return;
380 	}
381 
382 again:
383 	write_seqlock(&net->fs_lock);
384 
385 	fast = slow = server = NULL;
386 	nowj = jiffies;
387 	timer_at = nowj + MAX_JIFFY_OFFSET;
388 
389 	if (!list_empty(&net->fs_probe_fast)) {
390 		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
391 		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
392 		if (time_before(nowj, poll_at)) {
393 			timer_at = poll_at;
394 			set_timer = true;
395 			fast = NULL;
396 		}
397 	}
398 
399 	if (!list_empty(&net->fs_probe_slow)) {
400 		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
401 		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
402 		if (time_before(nowj, poll_at)) {
403 			if (time_before(poll_at, timer_at))
404 			    timer_at = poll_at;
405 			set_timer = true;
406 			slow = NULL;
407 		}
408 	}
409 
410 	server = fast ?: slow;
411 	if (server)
412 		_debug("probe %pU", &server->uuid);
413 
414 	if (server && (first_pass || !need_resched())) {
415 		afs_dispatch_fs_probe(net, server, server == fast);
416 		first_pass = false;
417 		goto again;
418 	}
419 
420 	write_sequnlock(&net->fs_lock);
421 
422 	if (server) {
423 		if (!queue_work(afs_wq, &net->fs_prober))
424 			afs_dec_servers_outstanding(net);
425 		_leave(" [requeue]");
426 	} else if (set_timer) {
427 		if (timer_reduce(&net->fs_probe_timer, timer_at))
428 			afs_dec_servers_outstanding(net);
429 		_leave(" [timer]");
430 	} else {
431 		afs_dec_servers_outstanding(net);
432 		_leave(" [quiesce]");
433 	}
434 }
435 
436 /*
437  * Wait for a probe on a particular fileserver to complete for 2s.
438  */
439 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
440 {
441 	struct wait_queue_entry wait;
442 	unsigned long timo = 2 * HZ;
443 
444 	if (atomic_read(&server->probe_outstanding) == 0)
445 		goto dont_wait;
446 
447 	init_wait_entry(&wait, 0);
448 	for (;;) {
449 		prepare_to_wait_event(&server->probe_wq, &wait,
450 				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
451 		if (timo == 0 ||
452 		    server->probe.responded ||
453 		    atomic_read(&server->probe_outstanding) == 0 ||
454 		    (is_intr && signal_pending(current)))
455 			break;
456 		timo = schedule_timeout(timo);
457 	}
458 
459 	finish_wait(&server->probe_wq, &wait);
460 
461 dont_wait:
462 	if (server->probe.responded)
463 		return 0;
464 	if (is_intr && signal_pending(current))
465 		return -ERESTARTSYS;
466 	if (timo == 0)
467 		return -ETIME;
468 	return -EDESTADDRREQ;
469 }
470 
471 /*
472  * Clean up the probing when the namespace is killed off.
473  */
474 void afs_fs_probe_cleanup(struct afs_net *net)
475 {
476 	if (del_timer_sync(&net->fs_probe_timer))
477 		afs_dec_servers_outstanding(net);
478 }
479