1 /* Handle fileserver selection and rotation. 2 * 3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/slab.h> 14 #include <linux/fs.h> 15 #include <linux/sched.h> 16 #include <linux/delay.h> 17 #include <linux/sched/signal.h> 18 #include "internal.h" 19 #include "afs_fs.h" 20 21 /* 22 * Initialise a filesystem server cursor for iterating over FS servers. 23 */ 24 void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) 25 { 26 memset(fc, 0, sizeof(*fc)); 27 } 28 29 /* 30 * Begin an operation on the fileserver. 31 * 32 * Fileserver operations are serialised on the server by vnode, so we serialise 33 * them here also using the io_lock. 34 */ 35 bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, 36 struct key *key) 37 { 38 afs_init_fs_cursor(fc, vnode); 39 fc->vnode = vnode; 40 fc->key = key; 41 fc->ac.error = SHRT_MAX; 42 43 if (mutex_lock_interruptible(&vnode->io_lock) < 0) { 44 fc->ac.error = -EINTR; 45 fc->flags |= AFS_FS_CURSOR_STOP; 46 return false; 47 } 48 49 if (vnode->lock_state != AFS_VNODE_LOCK_NONE) 50 fc->flags |= AFS_FS_CURSOR_CUR_ONLY; 51 return true; 52 } 53 54 /* 55 * Begin iteration through a server list, starting with the vnode's last used 56 * server if possible, or the last recorded good server if not. 57 */ 58 static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, 59 struct afs_vnode *vnode) 60 { 61 struct afs_cb_interest *cbi; 62 int i; 63 64 read_lock(&vnode->volume->servers_lock); 65 fc->server_list = afs_get_serverlist(vnode->volume->servers); 66 read_unlock(&vnode->volume->servers_lock); 67 68 cbi = vnode->cb_interest; 69 if (cbi) { 70 /* See if the vnode's preferred record is still available */ 71 for (i = 0; i < fc->server_list->nr_servers; i++) { 72 if (fc->server_list->servers[i].cb_interest == cbi) { 73 fc->start = i; 74 goto found_interest; 75 } 76 } 77 78 /* If we have a lock outstanding on a server that's no longer 79 * serving this vnode, then we can't switch to another server 80 * and have to return an error. 81 */ 82 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 83 fc->ac.error = -ESTALE; 84 return false; 85 } 86 87 /* Note that the callback promise is effectively broken */ 88 write_seqlock(&vnode->cb_lock); 89 ASSERTCMP(cbi, ==, vnode->cb_interest); 90 vnode->cb_interest = NULL; 91 if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) 92 vnode->cb_break++; 93 write_sequnlock(&vnode->cb_lock); 94 95 afs_put_cb_interest(afs_v2net(vnode), cbi); 96 cbi = NULL; 97 } else { 98 fc->start = READ_ONCE(fc->server_list->index); 99 } 100 101 found_interest: 102 fc->index = fc->start; 103 return true; 104 } 105 106 /* 107 * Post volume busy note. 108 */ 109 static void afs_busy(struct afs_volume *volume, u32 abort_code) 110 { 111 const char *m; 112 113 switch (abort_code) { 114 case VOFFLINE: m = "offline"; break; 115 case VRESTARTING: m = "restarting"; break; 116 case VSALVAGING: m = "being salvaged"; break; 117 default: m = "busy"; break; 118 } 119 120 pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); 121 } 122 123 /* 124 * Sleep and retry the operation to the same fileserver. 125 */ 126 static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) 127 { 128 msleep_interruptible(1000); 129 if (signal_pending(current)) { 130 fc->ac.error = -ERESTARTSYS; 131 return false; 132 } 133 134 return true; 135 } 136 137 /* 138 * Select the fileserver to use. May be called multiple times to rotate 139 * through the fileservers. 140 */ 141 bool afs_select_fileserver(struct afs_fs_cursor *fc) 142 { 143 struct afs_addr_list *alist; 144 struct afs_server *server; 145 struct afs_vnode *vnode = fc->vnode; 146 147 _enter("%u/%u,%u/%u,%d,%d", 148 fc->index, fc->start, 149 fc->ac.index, fc->ac.start, 150 fc->ac.error, fc->ac.abort_code); 151 152 if (fc->flags & AFS_FS_CURSOR_STOP) { 153 _leave(" = f [stopped]"); 154 return false; 155 } 156 157 /* Evaluate the result of the previous operation, if there was one. */ 158 switch (fc->ac.error) { 159 case SHRT_MAX: 160 goto start; 161 162 case 0: 163 default: 164 /* Success or local failure. Stop. */ 165 fc->flags |= AFS_FS_CURSOR_STOP; 166 _leave(" = f [okay/local %d]", fc->ac.error); 167 return false; 168 169 case -ECONNABORTED: 170 /* The far side rejected the operation on some grounds. This 171 * might involve the server being busy or the volume having been moved. 172 */ 173 switch (fc->ac.abort_code) { 174 case VNOVOL: 175 /* This fileserver doesn't know about the volume. 176 * - May indicate that the VL is wrong - retry once and compare 177 * the results. 178 * - May indicate that the fileserver couldn't attach to the vol. 179 */ 180 if (fc->flags & AFS_FS_CURSOR_VNOVOL) { 181 fc->ac.error = -EREMOTEIO; 182 goto failed; 183 } 184 185 write_lock(&vnode->volume->servers_lock); 186 fc->server_list->vnovol_mask |= 1 << fc->index; 187 write_unlock(&vnode->volume->servers_lock); 188 189 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); 190 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 191 if (fc->ac.error < 0) 192 goto failed; 193 194 if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { 195 fc->ac.error = -ENOMEDIUM; 196 goto failed; 197 } 198 199 /* If the server list didn't change, then assume that 200 * it's the fileserver having trouble. 201 */ 202 if (vnode->volume->servers == fc->server_list) { 203 fc->ac.error = -EREMOTEIO; 204 goto failed; 205 } 206 207 /* Try again */ 208 fc->flags |= AFS_FS_CURSOR_VNOVOL; 209 _leave(" = t [vnovol]"); 210 return true; 211 212 case VSALVAGE: /* TODO: Should this return an error or iterate? */ 213 case VVOLEXISTS: 214 case VNOSERVICE: 215 case VONLINE: 216 case VDISKFULL: 217 case VOVERQUOTA: 218 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 219 goto next_server; 220 221 case VOFFLINE: 222 if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { 223 afs_busy(vnode->volume, fc->ac.abort_code); 224 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); 225 } 226 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { 227 fc->ac.error = -EADV; 228 goto failed; 229 } 230 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 231 fc->ac.error = -ESTALE; 232 goto failed; 233 } 234 goto busy; 235 236 case VSALVAGING: 237 case VRESTARTING: 238 case VBUSY: 239 /* Retry after going round all the servers unless we 240 * have a file lock we need to maintain. 241 */ 242 if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { 243 fc->ac.error = -EBUSY; 244 goto failed; 245 } 246 if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { 247 afs_busy(vnode->volume, fc->ac.abort_code); 248 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); 249 } 250 busy: 251 if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { 252 if (!afs_sleep_and_retry(fc)) 253 goto failed; 254 255 /* Retry with same server & address */ 256 _leave(" = t [vbusy]"); 257 return true; 258 } 259 260 fc->flags |= AFS_FS_CURSOR_VBUSY; 261 goto next_server; 262 263 case VMOVED: 264 /* The volume migrated to another server. We consider 265 * consider all locks and callbacks broken and request 266 * an update from the VLDB. 267 * 268 * We also limit the number of VMOVED hops we will 269 * honour, just in case someone sets up a loop. 270 */ 271 if (fc->flags & AFS_FS_CURSOR_VMOVED) { 272 fc->ac.error = -EREMOTEIO; 273 goto failed; 274 } 275 fc->flags |= AFS_FS_CURSOR_VMOVED; 276 277 set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); 278 set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); 279 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 280 if (fc->ac.error < 0) 281 goto failed; 282 283 /* If the server list didn't change, then the VLDB is 284 * out of sync with the fileservers. This is hopefully 285 * a temporary condition, however, so we don't want to 286 * permanently block access to the file. 287 * 288 * TODO: Try other fileservers if we can. 289 * 290 * TODO: Retry a few times with sleeps. 291 */ 292 if (vnode->volume->servers == fc->server_list) { 293 fc->ac.error = -ENOMEDIUM; 294 goto failed; 295 } 296 297 goto restart_from_beginning; 298 299 default: 300 clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); 301 clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); 302 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 303 goto failed; 304 } 305 306 case -ENETUNREACH: 307 case -EHOSTUNREACH: 308 case -ECONNREFUSED: 309 case -ETIMEDOUT: 310 case -ETIME: 311 _debug("no conn"); 312 goto iterate_address; 313 } 314 315 restart_from_beginning: 316 _debug("restart"); 317 afs_end_cursor(&fc->ac); 318 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 319 fc->cbi = NULL; 320 afs_put_serverlist(afs_v2net(vnode), fc->server_list); 321 fc->server_list = NULL; 322 start: 323 _debug("start"); 324 /* See if we need to do an update of the volume record. Note that the 325 * volume may have moved or even have been deleted. 326 */ 327 fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); 328 if (fc->ac.error < 0) 329 goto failed; 330 331 if (!afs_start_fs_iteration(fc, vnode)) 332 goto failed; 333 334 use_server: 335 _debug("use"); 336 /* We're starting on a different fileserver from the list. We need to 337 * check it, create a callback intercept, find its address list and 338 * probe its capabilities before we use it. 339 */ 340 ASSERTCMP(fc->ac.alist, ==, NULL); 341 server = fc->server_list->servers[fc->index].server; 342 343 if (!afs_check_server_record(fc, server)) 344 goto failed; 345 346 _debug("USING SERVER: %pU", &server->uuid); 347 348 /* Make sure we've got a callback interest record for this server. We 349 * have to link it in before we send the request as we can be sent a 350 * break request before we've finished decoding the reply and 351 * installing the vnode. 352 */ 353 fc->ac.error = afs_register_server_cb_interest( 354 vnode, &fc->server_list->servers[fc->index]); 355 if (fc->ac.error < 0) 356 goto failed; 357 358 fc->cbi = afs_get_cb_interest(vnode->cb_interest); 359 360 read_lock(&server->fs_lock); 361 alist = rcu_dereference_protected(server->addresses, 362 lockdep_is_held(&server->fs_lock)); 363 afs_get_addrlist(alist); 364 read_unlock(&server->fs_lock); 365 366 memset(&fc->ac, 0, sizeof(fc->ac)); 367 368 /* Probe the current fileserver if we haven't done so yet. */ 369 if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { 370 fc->ac.alist = afs_get_addrlist(alist); 371 372 if (!afs_probe_fileserver(fc)) 373 goto failed; 374 } 375 376 if (!fc->ac.alist) 377 fc->ac.alist = alist; 378 else 379 afs_put_addrlist(alist); 380 381 fc->ac.start = READ_ONCE(alist->index); 382 fc->ac.index = fc->ac.start; 383 384 iterate_address: 385 ASSERT(fc->ac.alist); 386 _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); 387 /* Iterate over the current server's address list to try and find an 388 * address on which it will respond to us. 389 */ 390 if (!afs_iterate_addresses(&fc->ac)) 391 goto next_server; 392 393 _leave(" = t"); 394 return true; 395 396 next_server: 397 _debug("next"); 398 afs_end_cursor(&fc->ac); 399 afs_put_cb_interest(afs_v2net(vnode), fc->cbi); 400 fc->cbi = NULL; 401 fc->index++; 402 if (fc->index >= fc->server_list->nr_servers) 403 fc->index = 0; 404 if (fc->index != fc->start) 405 goto use_server; 406 407 /* That's all the servers poked to no good effect. Try again if some 408 * of them were busy. 409 */ 410 if (fc->flags & AFS_FS_CURSOR_VBUSY) 411 goto restart_from_beginning; 412 413 fc->ac.error = -EDESTADDRREQ; 414 goto failed; 415 416 failed: 417 fc->flags |= AFS_FS_CURSOR_STOP; 418 afs_end_cursor(&fc->ac); 419 _leave(" = f [failed %d]", fc->ac.error); 420 return false; 421 } 422 423 /* 424 * Select the same fileserver we used for a vnode before and only that 425 * fileserver. We use this when we have a lock on that file, which is backed 426 * only by the fileserver we obtained it from. 427 */ 428 bool afs_select_current_fileserver(struct afs_fs_cursor *fc) 429 { 430 struct afs_vnode *vnode = fc->vnode; 431 struct afs_cb_interest *cbi = vnode->cb_interest; 432 struct afs_addr_list *alist; 433 434 _enter(""); 435 436 switch (fc->ac.error) { 437 case SHRT_MAX: 438 if (!cbi) { 439 fc->ac.error = -ESTALE; 440 fc->flags |= AFS_FS_CURSOR_STOP; 441 return false; 442 } 443 444 fc->cbi = afs_get_cb_interest(vnode->cb_interest); 445 446 read_lock(&cbi->server->fs_lock); 447 alist = rcu_dereference_protected(cbi->server->addresses, 448 lockdep_is_held(&cbi->server->fs_lock)); 449 afs_get_addrlist(alist); 450 read_unlock(&cbi->server->fs_lock); 451 if (!alist) { 452 fc->ac.error = -ESTALE; 453 fc->flags |= AFS_FS_CURSOR_STOP; 454 return false; 455 } 456 457 memset(&fc->ac, 0, sizeof(fc->ac)); 458 fc->ac.alist = alist; 459 fc->ac.start = READ_ONCE(alist->index); 460 fc->ac.index = fc->ac.start; 461 goto iterate_address; 462 463 case 0: 464 default: 465 /* Success or local failure. Stop. */ 466 fc->flags |= AFS_FS_CURSOR_STOP; 467 _leave(" = f [okay/local %d]", fc->ac.error); 468 return false; 469 470 case -ECONNABORTED: 471 fc->flags |= AFS_FS_CURSOR_STOP; 472 _leave(" = f [abort]"); 473 return false; 474 475 case -ENETUNREACH: 476 case -EHOSTUNREACH: 477 case -ECONNREFUSED: 478 case -ETIMEDOUT: 479 case -ETIME: 480 _debug("no conn"); 481 goto iterate_address; 482 } 483 484 iterate_address: 485 /* Iterate over the current server's address list to try and find an 486 * address on which it will respond to us. 487 */ 488 if (afs_iterate_addresses(&fc->ac)) { 489 _leave(" = t"); 490 return true; 491 } 492 493 afs_end_cursor(&fc->ac); 494 return false; 495 } 496 497 /* 498 * Tidy up a filesystem cursor and unlock the vnode. 499 */ 500 int afs_end_vnode_operation(struct afs_fs_cursor *fc) 501 { 502 struct afs_net *net = afs_v2net(fc->vnode); 503 int ret; 504 505 mutex_unlock(&fc->vnode->io_lock); 506 507 afs_end_cursor(&fc->ac); 508 afs_put_cb_interest(net, fc->cbi); 509 afs_put_serverlist(net, fc->server_list); 510 511 ret = fc->ac.error; 512 if (ret == -ECONNABORTED) 513 afs_abort_to_error(fc->ac.abort_code); 514 515 return fc->ac.error; 516 } 517