1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS fileserver probing 3 * 4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/slab.h> 10 #include "afs_fs.h" 11 #include "internal.h" 12 #include "protocol_afs.h" 13 #include "protocol_yfs.h" 14 15 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; 16 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; 17 18 /* 19 * Start the probe polling timer. We have to supply it with an inc on the 20 * outstanding server count. 21 */ 22 static void afs_schedule_fs_probe(struct afs_net *net, 23 struct afs_server *server, bool fast) 24 { 25 unsigned long atj; 26 27 if (!net->live) 28 return; 29 30 atj = server->probed_at; 31 atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; 32 33 afs_inc_servers_outstanding(net); 34 if (timer_reduce(&net->fs_probe_timer, atj)) 35 afs_dec_servers_outstanding(net); 36 } 37 38 /* 39 * Handle the completion of a set of probes. 40 */ 41 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) 42 { 43 bool responded = server->probe.responded; 44 45 write_seqlock(&net->fs_lock); 46 if (responded) { 47 list_add_tail(&server->probe_link, &net->fs_probe_slow); 48 } else { 49 server->rtt = UINT_MAX; 50 clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 51 list_add_tail(&server->probe_link, &net->fs_probe_fast); 52 } 53 write_sequnlock(&net->fs_lock); 54 55 afs_schedule_fs_probe(net, server, !responded); 56 } 57 58 /* 59 * Handle the completion of a probe. 60 */ 61 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) 62 { 63 _enter(""); 64 65 if (atomic_dec_and_test(&server->probe_outstanding)) 66 afs_finished_fs_probe(net, server); 67 68 wake_up_all(&server->probe_wq); 69 } 70 71 /* 72 * Handle inability to send a probe due to ENOMEM when trying to allocate a 73 * call struct. 74 */ 75 static void afs_fs_probe_not_done(struct afs_net *net, 76 struct afs_server *server, 77 struct afs_addr_cursor *ac) 78 { 79 struct afs_addr_list *alist = ac->alist; 80 unsigned int index = ac->index; 81 82 _enter(""); 83 84 trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); 85 spin_lock(&server->probe_lock); 86 87 server->probe.local_failure = true; 88 if (server->probe.error == 0) 89 server->probe.error = -ENOMEM; 90 91 set_bit(index, &alist->failed); 92 93 spin_unlock(&server->probe_lock); 94 return afs_done_one_fs_probe(net, server); 95 } 96 97 /* 98 * Process the result of probing a fileserver. This is called after successful 99 * or failed delivery of an FS.GetCapabilities operation. 100 */ 101 void afs_fileserver_probe_result(struct afs_call *call) 102 { 103 struct afs_addr_list *alist = call->alist; 104 struct afs_server *server = call->server; 105 unsigned int index = call->addr_ix; 106 unsigned int rtt_us = 0, cap0; 107 int ret = call->error; 108 109 _enter("%pU,%u", &server->uuid, index); 110 111 spin_lock(&server->probe_lock); 112 113 switch (ret) { 114 case 0: 115 server->probe.error = 0; 116 goto responded; 117 case -ECONNABORTED: 118 if (!server->probe.responded) { 119 server->probe.abort_code = call->abort_code; 120 server->probe.error = ret; 121 } 122 goto responded; 123 case -ENOMEM: 124 case -ENONET: 125 clear_bit(index, &alist->responded); 126 server->probe.local_failure = true; 127 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 128 goto out; 129 case -ECONNRESET: /* Responded, but call expired. */ 130 case -ERFKILL: 131 case -EADDRNOTAVAIL: 132 case -ENETUNREACH: 133 case -EHOSTUNREACH: 134 case -EHOSTDOWN: 135 case -ECONNREFUSED: 136 case -ETIMEDOUT: 137 case -ETIME: 138 default: 139 clear_bit(index, &alist->responded); 140 set_bit(index, &alist->failed); 141 if (!server->probe.responded && 142 (server->probe.error == 0 || 143 server->probe.error == -ETIMEDOUT || 144 server->probe.error == -ETIME)) 145 server->probe.error = ret; 146 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 147 goto out; 148 } 149 150 responded: 151 clear_bit(index, &alist->failed); 152 153 if (call->service_id == YFS_FS_SERVICE) { 154 server->probe.is_yfs = true; 155 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 156 alist->addrs[index].srx_service = call->service_id; 157 } else { 158 server->probe.not_yfs = true; 159 if (!server->probe.is_yfs) { 160 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 161 alist->addrs[index].srx_service = call->service_id; 162 } 163 cap0 = ntohl(call->tmp); 164 if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES) 165 set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); 166 else 167 clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); 168 } 169 170 if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && 171 rtt_us < server->probe.rtt) { 172 server->probe.rtt = rtt_us; 173 server->rtt = rtt_us; 174 alist->preferred = index; 175 } 176 177 smp_wmb(); /* Set rtt before responded. */ 178 server->probe.responded = true; 179 set_bit(index, &alist->responded); 180 set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 181 out: 182 spin_unlock(&server->probe_lock); 183 184 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", 185 &server->uuid, index, &alist->addrs[index].transport, 186 rtt_us, ret); 187 188 return afs_done_one_fs_probe(call->net, server); 189 } 190 191 /* 192 * Probe one or all of a fileserver's addresses to find out the best route and 193 * to query its capabilities. 194 */ 195 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, 196 struct key *key, bool all) 197 { 198 struct afs_addr_cursor ac = { 199 .index = 0, 200 }; 201 202 _enter("%pU", &server->uuid); 203 204 read_lock(&server->fs_lock); 205 ac.alist = rcu_dereference_protected(server->addresses, 206 lockdep_is_held(&server->fs_lock)); 207 afs_get_addrlist(ac.alist); 208 read_unlock(&server->fs_lock); 209 210 server->probed_at = jiffies; 211 atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); 212 memset(&server->probe, 0, sizeof(server->probe)); 213 server->probe.rtt = UINT_MAX; 214 215 ac.index = ac.alist->preferred; 216 if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) 217 all = true; 218 219 if (all) { 220 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) 221 if (!afs_fs_get_capabilities(net, server, &ac, key)) 222 afs_fs_probe_not_done(net, server, &ac); 223 } else { 224 if (!afs_fs_get_capabilities(net, server, &ac, key)) 225 afs_fs_probe_not_done(net, server, &ac); 226 } 227 228 afs_put_addrlist(ac.alist); 229 } 230 231 /* 232 * Wait for the first as-yet untried fileserver to respond. 233 */ 234 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) 235 { 236 struct wait_queue_entry *waits; 237 struct afs_server *server; 238 unsigned int rtt = UINT_MAX, rtt_s; 239 bool have_responders = false; 240 int pref = -1, i; 241 242 _enter("%u,%lx", slist->nr_servers, untried); 243 244 /* Only wait for servers that have a probe outstanding. */ 245 for (i = 0; i < slist->nr_servers; i++) { 246 if (test_bit(i, &untried)) { 247 server = slist->servers[i].server; 248 if (!atomic_read(&server->probe_outstanding)) 249 __clear_bit(i, &untried); 250 if (server->probe.responded) 251 have_responders = true; 252 } 253 } 254 if (have_responders || !untried) 255 return 0; 256 257 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); 258 if (!waits) 259 return -ENOMEM; 260 261 for (i = 0; i < slist->nr_servers; i++) { 262 if (test_bit(i, &untried)) { 263 server = slist->servers[i].server; 264 init_waitqueue_entry(&waits[i], current); 265 add_wait_queue(&server->probe_wq, &waits[i]); 266 } 267 } 268 269 for (;;) { 270 bool still_probing = false; 271 272 set_current_state(TASK_INTERRUPTIBLE); 273 for (i = 0; i < slist->nr_servers; i++) { 274 if (test_bit(i, &untried)) { 275 server = slist->servers[i].server; 276 if (server->probe.responded) 277 goto stop; 278 if (atomic_read(&server->probe_outstanding)) 279 still_probing = true; 280 } 281 } 282 283 if (!still_probing || signal_pending(current)) 284 goto stop; 285 schedule(); 286 } 287 288 stop: 289 set_current_state(TASK_RUNNING); 290 291 for (i = 0; i < slist->nr_servers; i++) { 292 if (test_bit(i, &untried)) { 293 server = slist->servers[i].server; 294 rtt_s = READ_ONCE(server->rtt); 295 if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && 296 rtt_s < rtt) { 297 pref = i; 298 rtt = rtt_s; 299 } 300 301 remove_wait_queue(&server->probe_wq, &waits[i]); 302 } 303 } 304 305 kfree(waits); 306 307 if (pref == -1 && signal_pending(current)) 308 return -ERESTARTSYS; 309 310 if (pref >= 0) 311 slist->preferred = pref; 312 return 0; 313 } 314 315 /* 316 * Probe timer. We have an increment on fs_outstanding that we need to pass 317 * along to the work item. 318 */ 319 void afs_fs_probe_timer(struct timer_list *timer) 320 { 321 struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); 322 323 if (!net->live || !queue_work(afs_wq, &net->fs_prober)) 324 afs_dec_servers_outstanding(net); 325 } 326 327 /* 328 * Dispatch a probe to a server. 329 */ 330 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) 331 __releases(&net->fs_lock) 332 { 333 struct key *key = NULL; 334 335 /* We remove it from the queues here - it will be added back to 336 * one of the queues on the completion of the probe. 337 */ 338 list_del_init(&server->probe_link); 339 340 afs_get_server(server, afs_server_trace_get_probe); 341 write_sequnlock(&net->fs_lock); 342 343 afs_fs_probe_fileserver(net, server, key, all); 344 afs_put_server(net, server, afs_server_trace_put_probe); 345 } 346 347 /* 348 * Probe a server immediately without waiting for its due time to come 349 * round. This is used when all of the addresses have been tried. 350 */ 351 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) 352 { 353 write_seqlock(&net->fs_lock); 354 if (!list_empty(&server->probe_link)) 355 return afs_dispatch_fs_probe(net, server, true); 356 write_sequnlock(&net->fs_lock); 357 } 358 359 /* 360 * Probe dispatcher to regularly dispatch probes to keep NAT alive. 361 */ 362 void afs_fs_probe_dispatcher(struct work_struct *work) 363 { 364 struct afs_net *net = container_of(work, struct afs_net, fs_prober); 365 struct afs_server *fast, *slow, *server; 366 unsigned long nowj, timer_at, poll_at; 367 bool first_pass = true, set_timer = false; 368 369 if (!net->live) 370 return; 371 372 _enter(""); 373 374 if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { 375 _leave(" [none]"); 376 return; 377 } 378 379 again: 380 write_seqlock(&net->fs_lock); 381 382 fast = slow = server = NULL; 383 nowj = jiffies; 384 timer_at = nowj + MAX_JIFFY_OFFSET; 385 386 if (!list_empty(&net->fs_probe_fast)) { 387 fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); 388 poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; 389 if (time_before(nowj, poll_at)) { 390 timer_at = poll_at; 391 set_timer = true; 392 fast = NULL; 393 } 394 } 395 396 if (!list_empty(&net->fs_probe_slow)) { 397 slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); 398 poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; 399 if (time_before(nowj, poll_at)) { 400 if (time_before(poll_at, timer_at)) 401 timer_at = poll_at; 402 set_timer = true; 403 slow = NULL; 404 } 405 } 406 407 server = fast ?: slow; 408 if (server) 409 _debug("probe %pU", &server->uuid); 410 411 if (server && (first_pass || !need_resched())) { 412 afs_dispatch_fs_probe(net, server, server == fast); 413 first_pass = false; 414 goto again; 415 } 416 417 write_sequnlock(&net->fs_lock); 418 419 if (server) { 420 if (!queue_work(afs_wq, &net->fs_prober)) 421 afs_dec_servers_outstanding(net); 422 _leave(" [requeue]"); 423 } else if (set_timer) { 424 if (timer_reduce(&net->fs_probe_timer, timer_at)) 425 afs_dec_servers_outstanding(net); 426 _leave(" [timer]"); 427 } else { 428 afs_dec_servers_outstanding(net); 429 _leave(" [quiesce]"); 430 } 431 } 432 433 /* 434 * Wait for a probe on a particular fileserver to complete for 2s. 435 */ 436 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) 437 { 438 struct wait_queue_entry wait; 439 unsigned long timo = 2 * HZ; 440 441 if (atomic_read(&server->probe_outstanding) == 0) 442 goto dont_wait; 443 444 init_wait_entry(&wait, 0); 445 for (;;) { 446 prepare_to_wait_event(&server->probe_wq, &wait, 447 is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 448 if (timo == 0 || 449 server->probe.responded || 450 atomic_read(&server->probe_outstanding) == 0 || 451 (is_intr && signal_pending(current))) 452 break; 453 timo = schedule_timeout(timo); 454 } 455 456 finish_wait(&server->probe_wq, &wait); 457 458 dont_wait: 459 if (server->probe.responded) 460 return 0; 461 if (is_intr && signal_pending(current)) 462 return -ERESTARTSYS; 463 if (timo == 0) 464 return -ETIME; 465 return -EDESTADDRREQ; 466 } 467 468 /* 469 * Clean up the probing when the namespace is killed off. 470 */ 471 void afs_fs_probe_cleanup(struct afs_net *net) 472 { 473 if (del_timer_sync(&net->fs_probe_timer)) 474 afs_dec_servers_outstanding(net); 475 } 476