1 /* Handle fileserver selection and rotation. 2 * 3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/slab.h> 14 #include <linux/fs.h> 15 #include <linux/sched.h> 16 #include <linux/delay.h> 17 #include <linux/sched/signal.h> 18 #include "internal.h" 19 #include "afs_fs.h" 20 21 /* 22 * Initialise a filesystem server cursor for iterating over FS servers. 23 */ 24 static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) 25 { 26 memset(fc, 0, sizeof(*fc)); 27 } 28 29 /* 30 * Begin an operation on the fileserver. 31 * 32 * Fileserver operations are serialised on the server by vnode, so we serialise 33 * them here also using the io_lock. 34 */ 35 bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, 36 struct key *key) 37 { 38 afs_init_fs_cursor(fc, vnode); 39 fc->vnode = vnode; 40 fc->key = key; 41 fc->ac.error = SHRT_MAX; 42 43 if (mutex_lock_interruptible(&vnode->io_lock) < 0) { 44 fc->ac.error = -EINTR; 45 fc->flags |= AFS_FS_CURSOR_STOP; 46 return false; 47 } 48 49 if (vnode->lock_state != AFS_VNODE_LOCK_NONE) 50 fc->flags |= AFS_FS_CURSOR_CUR_ONLY; 51 return true; 52 } 53 54 /* 55 * Begin iteration through a server list, starting with the vnode's last used 56 * server if possible, or the last recorded good server if not. 57 */ 58 static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, 59 struct afs_vnode *vnode) 60 { 61 struct afs_cb_interest *cbi; 62 int i; 63 64 read_lock(&vnode->volume->servers_lock); 65 fc->server_list = afs_get_serverlist(vnode->volume->servers); 66 read_unlock(&vnode->volume->servers_lock); 67 68 cbi = vnode->cb_interest; 69 if (cbi) { 70 /* See if the vnode's preferred record is still available */ 71 for (i = 0; i < fc->server_list->nr_servers; i++) { 72 if (fc->server_list->servers[i].cb_interest == cbi) { 73 fc->start = i; 74 goto found_interest; 75 } 76 } 77 78 /* If we have a lock outstanding on a server that's no longer 79 * serving this vnode, then we can't switch to another server 80 * and have to return an error. 81 */ 82 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 83 fc->ac.error = -ESTALE; 84 return false; 85 } 86 87 /* Note that the callback promise is effectively broken */ 88 write_seqlock(&vnode->cb_lock); 89 ASSERTCMP(cbi, ==, vnode->cb_interest); 90 vnode->cb_interest = NULL; 91 if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) 92 vnode->cb_break++; 93 write_sequnlock(&vnode->cb_lock); 94 95 afs_put_cb_interest(afs_v2net(vnode), cbi); 96 cbi = NULL; 97 } else { 98 fc->start = READ_ONCE(fc->server_list->index); 99 } 100 101 found_interest: 102 fc->index = fc->start; 103 return true; 104 } 105 106 /* 107 * Post volume busy note. 108 */ 109 static void afs_busy(struct afs_volume *volume, u32 abort_code) 110 { 111 const char *m; 112 113 switch (abort_code) { 114 case VOFFLINE: m = "offline"; break; 115 case VRESTARTING: m = "restarting"; break; 116 case VSALVAGING: m = "being salvaged"; break; 117 default: m = "busy"; break; 118 } 119 120 pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); 121 } 122 123 /* 124 * Sleep and retry the operation to the same fileserver. 125 */ 126 static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) 127 { 128 msleep_interruptible(1000); 129 if (signal_pending(current)) { 130 fc->ac.error = -ERESTARTSYS; 131 return false; 132 } 133 134 return true; 135 } 136 137 /* 138 * Select the fileserver to use. May be called multiple times to rotate 139 * through the fileservers. 140 */ 141 bool afs_select_fileserver(struct afs_fs_cursor *fc) 142 { 143 struct afs_addr_list *alist; 144 struct afs_server *server; 145 struct afs_vnode *vnode = fc->vnode; 146 147 _enter("%u/%u,%u/%u,%d,%d", 148 fc->index, fc->start, 149 fc->ac.index, fc->ac.start, 150 fc->ac.error, fc->ac.abort_code); 151 152 if (fc->flags & AFS_FS_CURSOR_STOP) { 153 _leave(" = f [stopped]"); 154 return false; 155 } 156 157 /* Evaluate the result of the previous operation, if there was one. */ 158 switch (fc->ac.error) { 159 case SHRT_MAX: 160 goto start; 161 162 case 0: 163 default: 164 /* Success or local failure. Stop. */ 165 fc->flags |= AFS_FS_CURSOR_STOP; 166 _leave(" = f [okay/local %d]", fc->ac.error); 167 return false; 168 169 case -ECONNABORTED: 170 /* The far side rejected the operation on some grounds. This 171 * might involve the server being busy or the volume having been moved. 172 */ 173 switch (fc->ac.abort_code) { 174 case VNOVOL: 175 /* This fileserver doesn't know about the volume. 176 * - May indicate that the VL is wrong - retry once and compare 177 * the results. 178 * - May indicate that the fileserver couldn't attach to the vol. 179 */ 180 if (fc->flags & AFS_FS_CURSOR_VNOVOL) { 181 fc->ac.error = -EREMOTEIO; 182 goto next_server; 183 } 184 185 write_lock(&vnode->volume->servers_lock); 186 fc->server_list->vnovol_mask |= 1 << fc->index; 187 write_unlock(&vnode->volume->servers_lock); 188 189 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); 190 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 191 if (fc->ac.error < 0) 192 goto failed; 193 194 if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { 195 fc->ac.error = -ENOMEDIUM; 196 goto failed; 197 } 198 199 /* If the server list didn't change, then assume that 200 * it's the fileserver having trouble. 201 */ 202 if (vnode->volume->servers == fc->server_list) { 203 fc->ac.error = -EREMOTEIO; 204 goto next_server; 205 } 206 207 /* Try again */ 208 fc->flags |= AFS_FS_CURSOR_VNOVOL; 209 _leave(" = t [vnovol]"); 210 return true; 211 212 case VSALVAGE: /* TODO: Should this return an error or iterate? */ 213 case VVOLEXISTS: 214 case VNOSERVICE: 215 case VONLINE: 216 case VDISKFULL: 217 case VOVERQUOTA: 218 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 219 goto next_server; 220 221 case VOFFLINE: 222 if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { 223 afs_busy(vnode->volume, fc->ac.abort_code); 224 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); 225 } 226 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { 227 fc->ac.error = -EADV; 228 goto failed; 229 } 230 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 231 fc->ac.error = -ESTALE; 232 goto failed; 233 } 234 goto busy; 235 236 case VSALVAGING: 237 case VRESTARTING: 238 case VBUSY: 239 /* Retry after going round all the servers unless we 240 * have a file lock we need to maintain. 241 */ 242 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { 243 fc->ac.error = -EBUSY; 244 goto failed; 245 } 246 if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { 247 afs_busy(vnode->volume, fc->ac.abort_code); 248 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); 249 } 250 busy: 251 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 252 if (!afs_sleep_and_retry(fc)) 253 goto failed; 254 255 /* Retry with same server & address */ 256 _leave(" = t [vbusy]"); 257 return true; 258 } 259 260 fc->flags |= AFS_FS_CURSOR_VBUSY; 261 goto next_server; 262 263 case VMOVED: 264 /* The volume migrated to another server. We consider 265 * consider all locks and callbacks broken and request 266 * an update from the VLDB. 267 * 268 * We also limit the number of VMOVED hops we will 269 * honour, just in case someone sets up a loop. 270 */ 271 if (fc->flags & AFS_FS_CURSOR_VMOVED) { 272 fc->ac.error = -EREMOTEIO; 273 goto failed; 274 } 275 fc->flags |= AFS_FS_CURSOR_VMOVED; 276 277 set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); 278 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); 279 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 280 if (fc->ac.error < 0) 281 goto failed; 282 283 /* If the server list didn't change, then the VLDB is 284 * out of sync with the fileservers. This is hopefully 285 * a temporary condition, however, so we don't want to 286 * permanently block access to the file. 287 * 288 * TODO: Try other fileservers if we can. 289 * 290 * TODO: Retry a few times with sleeps. 291 */ 292 if (vnode->volume->servers == fc->server_list) { 293 fc->ac.error = -ENOMEDIUM; 294 goto failed; 295 } 296 297 goto restart_from_beginning; 298 299 default: 300 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); 301 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); 302 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 303 goto failed; 304 } 305 306 case -ENETUNREACH: 307 case -EHOSTUNREACH: 308 case -ECONNREFUSED: 309 case -ETIMEDOUT: 310 case -ETIME: 311 _debug("no conn"); 312 goto iterate_address; 313 314 case -ECONNRESET: 315 _debug("call reset"); 316 goto failed; 317 } 318 319 restart_from_beginning: 320 _debug("restart"); 321 afs_end_cursor(&fc->ac); 322 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 323 fc->cbi = NULL; 324 afs_put_serverlist(afs_v2net(vnode), fc->server_list); 325 fc->server_list = NULL; 326 start: 327 _debug("start"); 328 /* See if we need to do an update of the volume record. Note that the 329 * volume may have moved or even have been deleted. 330 */ 331 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 332 if (fc->ac.error < 0) 333 goto failed; 334 335 if (!afs_start_fs_iteration(fc, vnode)) 336 goto failed; 337 338 use_server: 339 _debug("use"); 340 /* We're starting on a different fileserver from the list. We need to 341 * check it, create a callback intercept, find its address list and 342 * probe its capabilities before we use it. 343 */ 344 ASSERTCMP(fc->ac.alist, ==, NULL); 345 server = fc->server_list->servers[fc->index].server; 346 347 if (!afs_check_server_record(fc, server)) 348 goto failed; 349 350 _debug("USING SERVER: %pU", &server->uuid); 351 352 /* Make sure we've got a callback interest record for this server. We 353 * have to link it in before we send the request as we can be sent a 354 * break request before we've finished decoding the reply and 355 * installing the vnode. 356 */ 357 fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, 358 fc->index); 359 if (fc->ac.error < 0) 360 goto failed; 361 362 fc->cbi = afs_get_cb_interest(vnode->cb_interest); 363 364 read_lock(&server->fs_lock); 365 alist = rcu_dereference_protected(server->addresses, 366 lockdep_is_held(&server->fs_lock)); 367 afs_get_addrlist(alist); 368 read_unlock(&server->fs_lock); 369 370 memset(&fc->ac, 0, sizeof(fc->ac)); 371 372 /* Probe the current fileserver if we haven't done so yet. */ 373 if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { 374 fc->ac.alist = afs_get_addrlist(alist); 375 376 if (!afs_probe_fileserver(fc)) { 377 switch (fc->ac.error) { 378 case -ENOMEM: 379 case -ERESTARTSYS: 380 case -EINTR: 381 goto failed; 382 default: 383 goto next_server; 384 } 385 } 386 } 387 388 if (!fc->ac.alist) 389 fc->ac.alist = alist; 390 else 391 afs_put_addrlist(alist); 392 393 fc->ac.start = READ_ONCE(alist->index); 394 fc->ac.index = fc->ac.start; 395 396 iterate_address: 397 ASSERT(fc->ac.alist); 398 _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); 399 /* Iterate over the current server's address list to try and find an 400 * address on which it will respond to us. 401 */ 402 if (!afs_iterate_addresses(&fc->ac)) 403 goto next_server; 404 405 _leave(" = t"); 406 return true; 407 408 next_server: 409 _debug("next"); 410 afs_end_cursor(&fc->ac); 411 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 412 fc->cbi = NULL; 413 fc->index++; 414 if (fc->index >= fc->server_list->nr_servers) 415 fc->index = 0; 416 if (fc->index != fc->start) 417 goto use_server; 418 419 /* That's all the servers poked to no good effect. Try again if some 420 * of them were busy. 421 */ 422 if (fc->flags & AFS_FS_CURSOR_VBUSY) 423 goto restart_from_beginning; 424 425 fc->ac.error = -EDESTADDRREQ; 426 goto failed; 427 428 failed: 429 fc->flags |= AFS_FS_CURSOR_STOP; 430 afs_end_cursor(&fc->ac); 431 _leave(" = f [failed %d]", fc->ac.error); 432 return false; 433 } 434 435 /* 436 * Select the same fileserver we used for a vnode before and only that 437 * fileserver. We use this when we have a lock on that file, which is backed 438 * only by the fileserver we obtained it from. 439 */ 440 bool afs_select_current_fileserver(struct afs_fs_cursor *fc) 441 { 442 struct afs_vnode *vnode = fc->vnode; 443 struct afs_cb_interest *cbi = vnode->cb_interest; 444 struct afs_addr_list *alist; 445 446 _enter(""); 447 448 switch (fc->ac.error) { 449 case SHRT_MAX: 450 if (!cbi) { 451 fc->ac.error = -ESTALE; 452 fc->flags |= AFS_FS_CURSOR_STOP; 453 return false; 454 } 455 456 fc->cbi = afs_get_cb_interest(vnode->cb_interest); 457 458 read_lock(&cbi->server->fs_lock); 459 alist = rcu_dereference_protected(cbi->server->addresses, 460 lockdep_is_held(&cbi->server->fs_lock)); 461 afs_get_addrlist(alist); 462 read_unlock(&cbi->server->fs_lock); 463 if (!alist) { 464 fc->ac.error = -ESTALE; 465 fc->flags |= AFS_FS_CURSOR_STOP; 466 return false; 467 } 468 469 memset(&fc->ac, 0, sizeof(fc->ac)); 470 fc->ac.alist = alist; 471 fc->ac.start = READ_ONCE(alist->index); 472 fc->ac.index = fc->ac.start; 473 goto iterate_address; 474 475 case 0: 476 default: 477 /* Success or local failure. Stop. */ 478 fc->flags |= AFS_FS_CURSOR_STOP; 479 _leave(" = f [okay/local %d]", fc->ac.error); 480 return false; 481 482 case -ECONNABORTED: 483 fc->flags |= AFS_FS_CURSOR_STOP; 484 _leave(" = f [abort]"); 485 return false; 486 487 case -ENETUNREACH: 488 case -EHOSTUNREACH: 489 case -ECONNREFUSED: 490 case -ETIMEDOUT: 491 case -ETIME: 492 _debug("no conn"); 493 goto iterate_address; 494 } 495 496 iterate_address: 497 /* Iterate over the current server's address list to try and find an 498 * address on which it will respond to us. 499 */ 500 if (afs_iterate_addresses(&fc->ac)) { 501 _leave(" = t"); 502 return true; 503 } 504 505 afs_end_cursor(&fc->ac); 506 return false; 507 } 508 509 /* 510 * Tidy up a filesystem cursor and unlock the vnode. 511 */ 512 int afs_end_vnode_operation(struct afs_fs_cursor *fc) 513 { 514 struct afs_net *net = afs_v2net(fc->vnode); 515 int ret; 516 517 mutex_unlock(&fc->vnode->io_lock); 518 519 afs_end_cursor(&fc->ac); 520 afs_put_cb_interest(net, fc->cbi); 521 afs_put_serverlist(net, fc->server_list); 522 523 ret = fc->ac.error; 524 if (ret == -ECONNABORTED) 525 afs_abort_to_error(fc->ac.abort_code); 526 527 return fc->ac.error; 528 } 529