1 /* AFS server record management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/sched.h> 13 #include <linux/slab.h> 14 #include "afs_fs.h" 15 #include "internal.h" 16 17 static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ 18 static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ 19 20 static void afs_inc_servers_outstanding(struct afs_net *net) 21 { 22 atomic_inc(&net->servers_outstanding); 23 } 24 25 static void afs_dec_servers_outstanding(struct afs_net *net) 26 { 27 if (atomic_dec_and_test(&net->servers_outstanding)) 28 wake_up_var(&net->servers_outstanding); 29 } 30 31 /* 32 * Find a server by one of its addresses. 33 */ 34 struct afs_server *afs_find_server(struct afs_net *net, 35 const struct sockaddr_rxrpc *srx) 36 { 37 const struct sockaddr_in6 *a = &srx->transport.sin6, *b; 38 const struct afs_addr_list *alist; 39 struct afs_server *server = NULL; 40 unsigned int i; 41 bool ipv6 = true; 42 int seq = 0, diff; 43 44 if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || 45 srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || 46 srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) 47 ipv6 = false; 48 49 rcu_read_lock(); 50 51 do { 52 if (server) 53 afs_put_server(net, server); 54 server = NULL; 55 read_seqbegin_or_lock(&net->fs_addr_lock, &seq); 56 57 if (ipv6) { 58 hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { 59 alist = rcu_dereference(server->addresses); 60 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { 61 b = &alist->addrs[i].transport.sin6; 62 diff = ((u16 __force)a->sin6_port - 63 (u16 __force)b->sin6_port); 64 if (diff == 0) 65 diff = memcmp(&a->sin6_addr, 66 &b->sin6_addr, 67 sizeof(struct in6_addr)); 68 if (diff == 0) 69 goto found; 70 if (diff < 0) { 71 // TODO: Sort the list 72 //if (i == alist->nr_ipv4) 73 // goto not_found; 74 break; 75 } 76 } 77 } 78 } else { 79 hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { 80 alist = rcu_dereference(server->addresses); 81 for (i = 0; i < alist->nr_ipv4; i++) { 82 b = &alist->addrs[i].transport.sin6; 83 diff = ((u16 __force)a->sin6_port - 84 (u16 __force)b->sin6_port); 85 if (diff == 0) 86 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - 87 (u32 __force)b->sin6_addr.s6_addr32[3]); 88 if (diff == 0) 89 goto found; 90 if (diff < 0) { 91 // TODO: Sort the list 92 //if (i == 0) 93 // goto not_found; 94 break; 95 } 96 } 97 } 98 } 99 100 //not_found: 101 server = NULL; 102 found: 103 if (server && !atomic_inc_not_zero(&server->usage)) 104 server = NULL; 105 106 } while (need_seqretry(&net->fs_addr_lock, seq)); 107 108 done_seqretry(&net->fs_addr_lock, seq); 109 110 rcu_read_unlock(); 111 return server; 112 } 113 114 /* 115 * Look up a server by its UUID 116 */ 117 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) 118 { 119 struct afs_server *server = NULL; 120 struct rb_node *p; 121 int diff, seq = 0; 122 123 _enter("%pU", uuid); 124 125 do { 126 /* Unfortunately, rbtree walking doesn't give reliable results 127 * under just the RCU read lock, so we have to check for 128 * changes. 129 */ 130 if (server) 131 afs_put_server(net, server); 132 server = NULL; 133 134 read_seqbegin_or_lock(&net->fs_lock, &seq); 135 136 p = net->fs_servers.rb_node; 137 while (p) { 138 server = rb_entry(p, struct afs_server, uuid_rb); 139 140 diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); 141 if (diff < 0) { 142 p = p->rb_left; 143 } else if (diff > 0) { 144 p = p->rb_right; 145 } else { 146 afs_get_server(server); 147 break; 148 } 149 150 server = NULL; 151 } 152 } while (need_seqretry(&net->fs_lock, seq)); 153 154 done_seqretry(&net->fs_lock, seq); 155 156 _leave(" = %p", server); 157 return server; 158 } 159 160 /* 161 * Install a server record in the namespace tree 162 */ 163 static struct afs_server *afs_install_server(struct afs_net *net, 164 struct afs_server *candidate) 165 { 166 const struct afs_addr_list *alist; 167 struct afs_server *server; 168 struct rb_node **pp, *p; 169 int ret = -EEXIST, diff; 170 171 _enter("%p", candidate); 172 173 write_seqlock(&net->fs_lock); 174 175 /* Firstly install the server in the UUID lookup tree */ 176 pp = &net->fs_servers.rb_node; 177 p = NULL; 178 while (*pp) { 179 p = *pp; 180 _debug("- consider %p", p); 181 server = rb_entry(p, struct afs_server, uuid_rb); 182 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); 183 if (diff < 0) 184 pp = &(*pp)->rb_left; 185 else if (diff > 0) 186 pp = &(*pp)->rb_right; 187 else 188 goto exists; 189 } 190 191 server = candidate; 192 rb_link_node(&server->uuid_rb, p, pp); 193 rb_insert_color(&server->uuid_rb, &net->fs_servers); 194 hlist_add_head_rcu(&server->proc_link, &net->fs_proc); 195 196 write_seqlock(&net->fs_addr_lock); 197 alist = rcu_dereference_protected(server->addresses, 198 lockdep_is_held(&net->fs_addr_lock.lock)); 199 200 /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install 201 * it in the IPv4 and/or IPv6 reverse-map lists. 202 * 203 * TODO: For speed we want to use something other than a flat list 204 * here; even sorting the list in terms of lowest address would help a 205 * bit, but anything we might want to do gets messy and memory 206 * intensive. 207 */ 208 if (alist->nr_ipv4 > 0) 209 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); 210 if (alist->nr_addrs > alist->nr_ipv4) 211 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); 212 213 write_sequnlock(&net->fs_addr_lock); 214 ret = 0; 215 216 exists: 217 afs_get_server(server); 218 write_sequnlock(&net->fs_lock); 219 return server; 220 } 221 222 /* 223 * allocate a new server record 224 */ 225 static struct afs_server *afs_alloc_server(struct afs_net *net, 226 const uuid_t *uuid, 227 struct afs_addr_list *alist) 228 { 229 struct afs_server *server; 230 231 _enter(""); 232 233 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 234 if (!server) 235 goto enomem; 236 237 atomic_set(&server->usage, 1); 238 RCU_INIT_POINTER(server->addresses, alist); 239 server->addr_version = alist->version; 240 server->uuid = *uuid; 241 server->flags = (1UL << AFS_SERVER_FL_NEW); 242 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 243 rwlock_init(&server->fs_lock); 244 INIT_LIST_HEAD(&server->cb_interests); 245 rwlock_init(&server->cb_break_lock); 246 247 afs_inc_servers_outstanding(net); 248 _leave(" = %p", server); 249 return server; 250 251 enomem: 252 _leave(" = NULL [nomem]"); 253 return NULL; 254 } 255 256 /* 257 * Look up an address record for a server 258 */ 259 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, 260 struct key *key, const uuid_t *uuid) 261 { 262 struct afs_addr_cursor ac; 263 struct afs_addr_list *alist; 264 int ret; 265 266 ret = afs_set_vl_cursor(&ac, cell); 267 if (ret < 0) 268 return ERR_PTR(ret); 269 270 while (afs_iterate_addresses(&ac)) { 271 if (test_bit(ac.index, &ac.alist->yfs)) 272 alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); 273 else 274 alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); 275 switch (ac.error) { 276 case 0: 277 afs_end_cursor(&ac); 278 return alist; 279 case -ECONNABORTED: 280 ac.error = afs_abort_to_error(ac.abort_code); 281 goto error; 282 case -ENOMEM: 283 case -ENONET: 284 goto error; 285 case -ENETUNREACH: 286 case -EHOSTUNREACH: 287 case -ECONNREFUSED: 288 break; 289 default: 290 ac.error = -EIO; 291 goto error; 292 } 293 } 294 295 error: 296 return ERR_PTR(afs_end_cursor(&ac)); 297 } 298 299 /* 300 * Get or create a fileserver record. 301 */ 302 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, 303 const uuid_t *uuid) 304 { 305 struct afs_addr_list *alist; 306 struct afs_server *server, *candidate; 307 308 _enter("%p,%pU", cell->net, uuid); 309 310 server = afs_find_server_by_uuid(cell->net, uuid); 311 if (server) 312 return server; 313 314 alist = afs_vl_lookup_addrs(cell, key, uuid); 315 if (IS_ERR(alist)) 316 return ERR_CAST(alist); 317 318 candidate = afs_alloc_server(cell->net, uuid, alist); 319 if (!candidate) { 320 afs_put_addrlist(alist); 321 return ERR_PTR(-ENOMEM); 322 } 323 324 server = afs_install_server(cell->net, candidate); 325 if (server != candidate) { 326 afs_put_addrlist(alist); 327 kfree(candidate); 328 } 329 330 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 331 return server; 332 } 333 334 /* 335 * Set the server timer to fire after a given delay, assuming it's not already 336 * set for an earlier time. 337 */ 338 static void afs_set_server_timer(struct afs_net *net, time64_t delay) 339 { 340 if (net->live) { 341 afs_inc_servers_outstanding(net); 342 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) 343 afs_dec_servers_outstanding(net); 344 } 345 } 346 347 /* 348 * Server management timer. We have an increment on fs_outstanding that we 349 * need to pass along to the work item. 350 */ 351 void afs_servers_timer(struct timer_list *timer) 352 { 353 struct afs_net *net = container_of(timer, struct afs_net, fs_timer); 354 355 _enter(""); 356 if (!queue_work(afs_wq, &net->fs_manager)) 357 afs_dec_servers_outstanding(net); 358 } 359 360 /* 361 * Release a reference on a server record. 362 */ 363 void afs_put_server(struct afs_net *net, struct afs_server *server) 364 { 365 unsigned int usage; 366 367 if (!server) 368 return; 369 370 server->put_time = ktime_get_real_seconds(); 371 372 usage = atomic_dec_return(&server->usage); 373 374 _enter("{%u}", usage); 375 376 if (likely(usage > 0)) 377 return; 378 379 afs_set_server_timer(net, afs_server_gc_delay); 380 } 381 382 static void afs_server_rcu(struct rcu_head *rcu) 383 { 384 struct afs_server *server = container_of(rcu, struct afs_server, rcu); 385 386 afs_put_addrlist(rcu_access_pointer(server->addresses)); 387 kfree(server); 388 } 389 390 /* 391 * destroy a dead server 392 */ 393 static void afs_destroy_server(struct afs_net *net, struct afs_server *server) 394 { 395 struct afs_addr_list *alist = rcu_access_pointer(server->addresses); 396 struct afs_addr_cursor ac = { 397 .alist = alist, 398 .addr = &alist->addrs[0], 399 .start = alist->index, 400 .index = alist->index, 401 .error = 0, 402 }; 403 _enter("%p", server); 404 405 afs_fs_give_up_all_callbacks(net, server, &ac, NULL); 406 call_rcu(&server->rcu, afs_server_rcu); 407 afs_dec_servers_outstanding(net); 408 } 409 410 /* 411 * Garbage collect any expired servers. 412 */ 413 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) 414 { 415 struct afs_server *server; 416 bool deleted; 417 int usage; 418 419 while ((server = gc_list)) { 420 gc_list = server->gc_next; 421 422 write_seqlock(&net->fs_lock); 423 usage = 1; 424 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); 425 if (deleted) { 426 rb_erase(&server->uuid_rb, &net->fs_servers); 427 hlist_del_rcu(&server->proc_link); 428 } 429 write_sequnlock(&net->fs_lock); 430 431 if (deleted) { 432 write_seqlock(&net->fs_addr_lock); 433 if (!hlist_unhashed(&server->addr4_link)) 434 hlist_del_rcu(&server->addr4_link); 435 if (!hlist_unhashed(&server->addr6_link)) 436 hlist_del_rcu(&server->addr6_link); 437 write_sequnlock(&net->fs_addr_lock); 438 afs_destroy_server(net, server); 439 } 440 } 441 } 442 443 /* 444 * Manage the records of servers known to be within a network namespace. This 445 * includes garbage collecting unused servers. 446 * 447 * Note also that we were given an increment on net->servers_outstanding by 448 * whoever queued us that we need to deal with before returning. 449 */ 450 void afs_manage_servers(struct work_struct *work) 451 { 452 struct afs_net *net = container_of(work, struct afs_net, fs_manager); 453 struct afs_server *gc_list = NULL; 454 struct rb_node *cursor; 455 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; 456 bool purging = !net->live; 457 458 _enter(""); 459 460 /* Trawl the server list looking for servers that have expired from 461 * lack of use. 462 */ 463 read_seqlock_excl(&net->fs_lock); 464 465 for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { 466 struct afs_server *server = 467 rb_entry(cursor, struct afs_server, uuid_rb); 468 int usage = atomic_read(&server->usage); 469 470 _debug("manage %pU %u", &server->uuid, usage); 471 472 ASSERTCMP(usage, >=, 1); 473 ASSERTIFCMP(purging, usage, ==, 1); 474 475 if (usage == 1) { 476 time64_t expire_at = server->put_time; 477 478 if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && 479 !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) 480 expire_at += afs_server_gc_delay; 481 if (purging || expire_at <= now) { 482 server->gc_next = gc_list; 483 gc_list = server; 484 } else if (expire_at < next_manage) { 485 next_manage = expire_at; 486 } 487 } 488 } 489 490 read_sequnlock_excl(&net->fs_lock); 491 492 /* Update the timer on the way out. We have to pass an increment on 493 * servers_outstanding in the namespace that we are in to the timer or 494 * the work scheduler. 495 */ 496 if (!purging && next_manage < TIME64_MAX) { 497 now = ktime_get_real_seconds(); 498 499 if (next_manage - now <= 0) { 500 if (queue_work(afs_wq, &net->fs_manager)) 501 afs_inc_servers_outstanding(net); 502 } else { 503 afs_set_server_timer(net, next_manage - now); 504 } 505 } 506 507 afs_gc_servers(net, gc_list); 508 509 afs_dec_servers_outstanding(net); 510 _leave(" [%d]", atomic_read(&net->servers_outstanding)); 511 } 512 513 static void afs_queue_server_manager(struct afs_net *net) 514 { 515 afs_inc_servers_outstanding(net); 516 if (!queue_work(afs_wq, &net->fs_manager)) 517 afs_dec_servers_outstanding(net); 518 } 519 520 /* 521 * Purge list of servers. 522 */ 523 void afs_purge_servers(struct afs_net *net) 524 { 525 _enter(""); 526 527 if (del_timer_sync(&net->fs_timer)) 528 atomic_dec(&net->servers_outstanding); 529 530 afs_queue_server_manager(net); 531 532 _debug("wait"); 533 wait_var_event(&net->servers_outstanding, 534 !atomic_read(&net->servers_outstanding)); 535 _leave(""); 536 } 537 538 /* 539 * Probe a fileserver to find its capabilities. 540 * 541 * TODO: Try service upgrade. 542 */ 543 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) 544 { 545 _enter(""); 546 547 fc->ac.addr = NULL; 548 fc->ac.start = READ_ONCE(fc->ac.alist->index); 549 fc->ac.index = fc->ac.start; 550 fc->ac.error = 0; 551 fc->ac.begun = false; 552 553 while (afs_iterate_addresses(&fc->ac)) { 554 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, 555 &fc->ac, fc->key); 556 switch (fc->ac.error) { 557 case 0: 558 afs_end_cursor(&fc->ac); 559 set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); 560 return true; 561 case -ECONNABORTED: 562 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 563 goto error; 564 case -ENOMEM: 565 case -ENONET: 566 goto error; 567 case -ENETUNREACH: 568 case -EHOSTUNREACH: 569 case -ECONNREFUSED: 570 case -ETIMEDOUT: 571 case -ETIME: 572 break; 573 default: 574 fc->ac.error = -EIO; 575 goto error; 576 } 577 } 578 579 error: 580 afs_end_cursor(&fc->ac); 581 return false; 582 } 583 584 /* 585 * If we haven't already, try probing the fileserver to get its capabilities. 586 * We try not to instigate parallel probes, but it's possible that the parallel 587 * probes will fail due to authentication failure when ours would succeed. 588 * 589 * TODO: Try sending an anonymous probe if an authenticated probe fails. 590 */ 591 bool afs_probe_fileserver(struct afs_fs_cursor *fc) 592 { 593 bool success; 594 int ret, retries = 0; 595 596 _enter(""); 597 598 retry: 599 if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { 600 _leave(" = t"); 601 return true; 602 } 603 604 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { 605 success = afs_do_probe_fileserver(fc); 606 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); 607 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); 608 _leave(" = t"); 609 return success; 610 } 611 612 _debug("wait"); 613 ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, 614 TASK_INTERRUPTIBLE); 615 if (ret == -ERESTARTSYS) { 616 fc->ac.error = ret; 617 _leave(" = f [%d]", ret); 618 return false; 619 } 620 621 retries++; 622 if (retries == 4) { 623 fc->ac.error = -ESTALE; 624 _leave(" = f [stale]"); 625 return false; 626 } 627 _debug("retry"); 628 goto retry; 629 } 630 631 /* 632 * Get an update for a server's address list. 633 */ 634 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 635 { 636 struct afs_addr_list *alist, *discard; 637 638 _enter(""); 639 640 alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, 641 &server->uuid); 642 if (IS_ERR(alist)) { 643 fc->ac.error = PTR_ERR(alist); 644 _leave(" = f [%d]", fc->ac.error); 645 return false; 646 } 647 648 discard = alist; 649 if (server->addr_version != alist->version) { 650 write_lock(&server->fs_lock); 651 discard = rcu_dereference_protected(server->addresses, 652 lockdep_is_held(&server->fs_lock)); 653 rcu_assign_pointer(server->addresses, alist); 654 server->addr_version = alist->version; 655 write_unlock(&server->fs_lock); 656 } 657 658 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 659 afs_put_addrlist(discard); 660 _leave(" = t"); 661 return true; 662 } 663 664 /* 665 * See if a server's address list needs updating. 666 */ 667 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 668 { 669 time64_t now = ktime_get_real_seconds(); 670 long diff; 671 bool success; 672 int ret, retries = 0; 673 674 _enter(""); 675 676 ASSERT(server); 677 678 retry: 679 diff = READ_ONCE(server->update_at) - now; 680 if (diff > 0) { 681 _leave(" = t [not now %ld]", diff); 682 return true; 683 } 684 685 if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { 686 success = afs_update_server_record(fc, server); 687 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); 688 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); 689 _leave(" = %d", success); 690 return success; 691 } 692 693 ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, 694 TASK_INTERRUPTIBLE); 695 if (ret == -ERESTARTSYS) { 696 fc->ac.error = ret; 697 _leave(" = f [intr]"); 698 return false; 699 } 700 701 retries++; 702 if (retries == 4) { 703 _leave(" = f [stale]"); 704 ret = -ESTALE; 705 return false; 706 } 707 goto retry; 708 } 709