1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS fileserver probing 3 * 4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/slab.h> 10 #include "afs_fs.h" 11 #include "internal.h" 12 #include "protocol_yfs.h" 13 14 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; 15 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; 16 17 /* 18 * Start the probe polling timer. We have to supply it with an inc on the 19 * outstanding server count. 20 */ 21 static void afs_schedule_fs_probe(struct afs_net *net, 22 struct afs_server *server, bool fast) 23 { 24 unsigned long atj; 25 26 if (!net->live) 27 return; 28 29 atj = server->probed_at; 30 atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; 31 32 afs_inc_servers_outstanding(net); 33 if (timer_reduce(&net->fs_probe_timer, atj)) 34 afs_dec_servers_outstanding(net); 35 } 36 37 /* 38 * Handle the completion of a set of probes. 39 */ 40 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) 41 { 42 bool responded = server->probe.responded; 43 44 write_seqlock(&net->fs_lock); 45 if (responded) { 46 list_add_tail(&server->probe_link, &net->fs_probe_slow); 47 } else { 48 server->rtt = UINT_MAX; 49 clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 50 list_add_tail(&server->probe_link, &net->fs_probe_fast); 51 } 52 write_sequnlock(&net->fs_lock); 53 54 afs_schedule_fs_probe(net, server, !responded); 55 } 56 57 /* 58 * Handle the completion of a probe. 59 */ 60 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) 61 { 62 _enter(""); 63 64 if (atomic_dec_and_test(&server->probe_outstanding)) 65 afs_finished_fs_probe(net, server); 66 67 wake_up_all(&server->probe_wq); 68 } 69 70 /* 71 * Handle inability to send a probe due to ENOMEM when trying to allocate a 72 * call struct. 73 */ 74 static void afs_fs_probe_not_done(struct afs_net *net, 75 struct afs_server *server, 76 struct afs_addr_cursor *ac) 77 { 78 struct afs_addr_list *alist = ac->alist; 79 unsigned int index = ac->index; 80 81 _enter(""); 82 83 trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); 84 spin_lock(&server->probe_lock); 85 86 server->probe.local_failure = true; 87 if (server->probe.error == 0) 88 server->probe.error = -ENOMEM; 89 90 set_bit(index, &alist->failed); 91 92 spin_unlock(&server->probe_lock); 93 return afs_done_one_fs_probe(net, server); 94 } 95 96 /* 97 * Process the result of probing a fileserver. This is called after successful 98 * or failed delivery of an FS.GetCapabilities operation. 99 */ 100 void afs_fileserver_probe_result(struct afs_call *call) 101 { 102 struct afs_addr_list *alist = call->alist; 103 struct afs_server *server = call->server; 104 unsigned int index = call->addr_ix; 105 unsigned int rtt_us = 0; 106 int ret = call->error; 107 108 _enter("%pU,%u", &server->uuid, index); 109 110 spin_lock(&server->probe_lock); 111 112 switch (ret) { 113 case 0: 114 server->probe.error = 0; 115 goto responded; 116 case -ECONNABORTED: 117 if (!server->probe.responded) { 118 server->probe.abort_code = call->abort_code; 119 server->probe.error = ret; 120 } 121 goto responded; 122 case -ENOMEM: 123 case -ENONET: 124 clear_bit(index, &alist->responded); 125 server->probe.local_failure = true; 126 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 127 goto out; 128 case -ECONNRESET: /* Responded, but call expired. */ 129 case -ERFKILL: 130 case -EADDRNOTAVAIL: 131 case -ENETUNREACH: 132 case -EHOSTUNREACH: 133 case -EHOSTDOWN: 134 case -ECONNREFUSED: 135 case -ETIMEDOUT: 136 case -ETIME: 137 default: 138 clear_bit(index, &alist->responded); 139 set_bit(index, &alist->failed); 140 if (!server->probe.responded && 141 (server->probe.error == 0 || 142 server->probe.error == -ETIMEDOUT || 143 server->probe.error == -ETIME)) 144 server->probe.error = ret; 145 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 146 goto out; 147 } 148 149 responded: 150 clear_bit(index, &alist->failed); 151 152 if (call->service_id == YFS_FS_SERVICE) { 153 server->probe.is_yfs = true; 154 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 155 alist->addrs[index].srx_service = call->service_id; 156 } else { 157 server->probe.not_yfs = true; 158 if (!server->probe.is_yfs) { 159 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 160 alist->addrs[index].srx_service = call->service_id; 161 } 162 } 163 164 rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); 165 if (rtt_us < server->probe.rtt) { 166 server->probe.rtt = rtt_us; 167 server->rtt = rtt_us; 168 alist->preferred = index; 169 } 170 171 smp_wmb(); /* Set rtt before responded. */ 172 server->probe.responded = true; 173 set_bit(index, &alist->responded); 174 set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 175 out: 176 spin_unlock(&server->probe_lock); 177 178 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", 179 &server->uuid, index, &alist->addrs[index].transport, 180 rtt_us, ret); 181 182 return afs_done_one_fs_probe(call->net, server); 183 } 184 185 /* 186 * Probe one or all of a fileserver's addresses to find out the best route and 187 * to query its capabilities. 188 */ 189 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, 190 struct key *key, bool all) 191 { 192 struct afs_addr_cursor ac = { 193 .index = 0, 194 }; 195 196 _enter("%pU", &server->uuid); 197 198 read_lock(&server->fs_lock); 199 ac.alist = rcu_dereference_protected(server->addresses, 200 lockdep_is_held(&server->fs_lock)); 201 afs_get_addrlist(ac.alist); 202 read_unlock(&server->fs_lock); 203 204 server->probed_at = jiffies; 205 atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); 206 memset(&server->probe, 0, sizeof(server->probe)); 207 server->probe.rtt = UINT_MAX; 208 209 ac.index = ac.alist->preferred; 210 if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) 211 all = true; 212 213 if (all) { 214 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) 215 if (!afs_fs_get_capabilities(net, server, &ac, key)) 216 afs_fs_probe_not_done(net, server, &ac); 217 } else { 218 if (!afs_fs_get_capabilities(net, server, &ac, key)) 219 afs_fs_probe_not_done(net, server, &ac); 220 } 221 222 afs_put_addrlist(ac.alist); 223 } 224 225 /* 226 * Wait for the first as-yet untried fileserver to respond. 227 */ 228 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) 229 { 230 struct wait_queue_entry *waits; 231 struct afs_server *server; 232 unsigned int rtt = UINT_MAX, rtt_s; 233 bool have_responders = false; 234 int pref = -1, i; 235 236 _enter("%u,%lx", slist->nr_servers, untried); 237 238 /* Only wait for servers that have a probe outstanding. */ 239 for (i = 0; i < slist->nr_servers; i++) { 240 if (test_bit(i, &untried)) { 241 server = slist->servers[i].server; 242 if (!atomic_read(&server->probe_outstanding)) 243 __clear_bit(i, &untried); 244 if (server->probe.responded) 245 have_responders = true; 246 } 247 } 248 if (have_responders || !untried) 249 return 0; 250 251 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); 252 if (!waits) 253 return -ENOMEM; 254 255 for (i = 0; i < slist->nr_servers; i++) { 256 if (test_bit(i, &untried)) { 257 server = slist->servers[i].server; 258 init_waitqueue_entry(&waits[i], current); 259 add_wait_queue(&server->probe_wq, &waits[i]); 260 } 261 } 262 263 for (;;) { 264 bool still_probing = false; 265 266 set_current_state(TASK_INTERRUPTIBLE); 267 for (i = 0; i < slist->nr_servers; i++) { 268 if (test_bit(i, &untried)) { 269 server = slist->servers[i].server; 270 if (server->probe.responded) 271 goto stop; 272 if (atomic_read(&server->probe_outstanding)) 273 still_probing = true; 274 } 275 } 276 277 if (!still_probing || signal_pending(current)) 278 goto stop; 279 schedule(); 280 } 281 282 stop: 283 set_current_state(TASK_RUNNING); 284 285 for (i = 0; i < slist->nr_servers; i++) { 286 if (test_bit(i, &untried)) { 287 server = slist->servers[i].server; 288 rtt_s = READ_ONCE(server->rtt); 289 if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && 290 rtt_s < rtt) { 291 pref = i; 292 rtt = rtt_s; 293 } 294 295 remove_wait_queue(&server->probe_wq, &waits[i]); 296 } 297 } 298 299 kfree(waits); 300 301 if (pref == -1 && signal_pending(current)) 302 return -ERESTARTSYS; 303 304 if (pref >= 0) 305 slist->preferred = pref; 306 return 0; 307 } 308 309 /* 310 * Probe timer. We have an increment on fs_outstanding that we need to pass 311 * along to the work item. 312 */ 313 void afs_fs_probe_timer(struct timer_list *timer) 314 { 315 struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); 316 317 if (!net->live || !queue_work(afs_wq, &net->fs_prober)) 318 afs_dec_servers_outstanding(net); 319 } 320 321 /* 322 * Dispatch a probe to a server. 323 */ 324 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) 325 __releases(&net->fs_lock) 326 { 327 struct key *key = NULL; 328 329 /* We remove it from the queues here - it will be added back to 330 * one of the queues on the completion of the probe. 331 */ 332 list_del_init(&server->probe_link); 333 334 afs_get_server(server, afs_server_trace_get_probe); 335 write_sequnlock(&net->fs_lock); 336 337 afs_fs_probe_fileserver(net, server, key, all); 338 afs_put_server(net, server, afs_server_trace_put_probe); 339 } 340 341 /* 342 * Probe a server immediately without waiting for its due time to come 343 * round. This is used when all of the addresses have been tried. 344 */ 345 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) 346 { 347 write_seqlock(&net->fs_lock); 348 if (!list_empty(&server->probe_link)) 349 return afs_dispatch_fs_probe(net, server, true); 350 write_sequnlock(&net->fs_lock); 351 } 352 353 /* 354 * Probe dispatcher to regularly dispatch probes to keep NAT alive. 355 */ 356 void afs_fs_probe_dispatcher(struct work_struct *work) 357 { 358 struct afs_net *net = container_of(work, struct afs_net, fs_prober); 359 struct afs_server *fast, *slow, *server; 360 unsigned long nowj, timer_at, poll_at; 361 bool first_pass = true, set_timer = false; 362 363 if (!net->live) 364 return; 365 366 _enter(""); 367 368 if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { 369 _leave(" [none]"); 370 return; 371 } 372 373 again: 374 write_seqlock(&net->fs_lock); 375 376 fast = slow = server = NULL; 377 nowj = jiffies; 378 timer_at = nowj + MAX_JIFFY_OFFSET; 379 380 if (!list_empty(&net->fs_probe_fast)) { 381 fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); 382 poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; 383 if (time_before(nowj, poll_at)) { 384 timer_at = poll_at; 385 set_timer = true; 386 fast = NULL; 387 } 388 } 389 390 if (!list_empty(&net->fs_probe_slow)) { 391 slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); 392 poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; 393 if (time_before(nowj, poll_at)) { 394 if (time_before(poll_at, timer_at)) 395 timer_at = poll_at; 396 set_timer = true; 397 slow = NULL; 398 } 399 } 400 401 server = fast ?: slow; 402 if (server) 403 _debug("probe %pU", &server->uuid); 404 405 if (server && (first_pass || !need_resched())) { 406 afs_dispatch_fs_probe(net, server, server == fast); 407 first_pass = false; 408 goto again; 409 } 410 411 write_sequnlock(&net->fs_lock); 412 413 if (server) { 414 if (!queue_work(afs_wq, &net->fs_prober)) 415 afs_dec_servers_outstanding(net); 416 _leave(" [requeue]"); 417 } else if (set_timer) { 418 if (timer_reduce(&net->fs_probe_timer, timer_at)) 419 afs_dec_servers_outstanding(net); 420 _leave(" [timer]"); 421 } else { 422 afs_dec_servers_outstanding(net); 423 _leave(" [quiesce]"); 424 } 425 } 426 427 /* 428 * Wait for a probe on a particular fileserver to complete for 2s. 429 */ 430 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) 431 { 432 struct wait_queue_entry wait; 433 unsigned long timo = 2 * HZ; 434 435 if (atomic_read(&server->probe_outstanding) == 0) 436 goto dont_wait; 437 438 init_wait_entry(&wait, 0); 439 for (;;) { 440 prepare_to_wait_event(&server->probe_wq, &wait, 441 is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 442 if (timo == 0 || 443 server->probe.responded || 444 atomic_read(&server->probe_outstanding) == 0 || 445 (is_intr && signal_pending(current))) 446 break; 447 timo = schedule_timeout(timo); 448 } 449 450 finish_wait(&server->probe_wq, &wait); 451 452 dont_wait: 453 if (server->probe.responded) 454 return 0; 455 if (is_intr && signal_pending(current)) 456 return -ERESTARTSYS; 457 if (timo == 0) 458 return -ETIME; 459 return -EDESTADDRREQ; 460 } 461 462 /* 463 * Clean up the probing when the namespace is killed off. 464 */ 465 void afs_fs_probe_cleanup(struct afs_net *net) 466 { 467 if (del_timer_sync(&net->fs_probe_timer)) 468 afs_dec_servers_outstanding(net); 469 } 470