1 /* AFS server record management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/sched.h> 13 #include <linux/slab.h> 14 #include "afs_fs.h" 15 #include "internal.h" 16 17 static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ 18 static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ 19 20 static void afs_inc_servers_outstanding(struct afs_net *net) 21 { 22 atomic_inc(&net->servers_outstanding); 23 } 24 25 static void afs_dec_servers_outstanding(struct afs_net *net) 26 { 27 if (atomic_dec_and_test(&net->servers_outstanding)) 28 wake_up_atomic_t(&net->servers_outstanding); 29 } 30 31 /* 32 * Find a server by one of its addresses. 33 */ 34 struct afs_server *afs_find_server(struct afs_net *net, 35 const struct sockaddr_rxrpc *srx) 36 { 37 const struct sockaddr_in6 *a = &srx->transport.sin6, *b; 38 const struct afs_addr_list *alist; 39 struct afs_server *server = NULL; 40 unsigned int i; 41 bool ipv6 = true; 42 int seq = 0, diff; 43 44 if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || 45 srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || 46 srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) 47 ipv6 = false; 48 49 rcu_read_lock(); 50 51 do { 52 if (server) 53 afs_put_server(net, server); 54 server = NULL; 55 read_seqbegin_or_lock(&net->fs_addr_lock, &seq); 56 57 if (ipv6) { 58 hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { 59 alist = rcu_dereference(server->addresses); 60 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { 61 b = &alist->addrs[i].transport.sin6; 62 diff = (u16)a->sin6_port - (u16)b->sin6_port; 63 if (diff == 0) 64 diff = memcmp(&a->sin6_addr, 65 &b->sin6_addr, 66 sizeof(struct in6_addr)); 67 if (diff == 0) 68 goto found; 69 if (diff < 0) { 70 // TODO: Sort the list 71 //if (i == alist->nr_ipv4) 72 // goto not_found; 73 break; 74 } 75 } 76 } 77 } else { 78 hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { 79 alist = rcu_dereference(server->addresses); 80 for (i = 0; i < alist->nr_ipv4; i++) { 81 b = &alist->addrs[i].transport.sin6; 82 diff = (u16)a->sin6_port - (u16)b->sin6_port; 83 if (diff == 0) 84 diff = ((u32)a->sin6_addr.s6_addr32[3] - 85 (u32)b->sin6_addr.s6_addr32[3]); 86 if (diff == 0) 87 goto found; 88 if (diff < 0) { 89 // TODO: Sort the list 90 //if (i == 0) 91 // goto not_found; 92 break; 93 } 94 } 95 } 96 } 97 98 //not_found: 99 server = NULL; 100 found: 101 if (server && !atomic_inc_not_zero(&server->usage)) 102 server = NULL; 103 104 } while (need_seqretry(&net->fs_addr_lock, seq)); 105 106 done_seqretry(&net->fs_addr_lock, seq); 107 108 rcu_read_unlock(); 109 return server; 110 } 111 112 /* 113 * Look up a server by its UUID 114 */ 115 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) 116 { 117 struct afs_server *server = NULL; 118 struct rb_node *p; 119 int diff, seq = 0; 120 121 _enter("%pU", uuid); 122 123 do { 124 /* Unfortunately, rbtree walking doesn't give reliable results 125 * under just the RCU read lock, so we have to check for 126 * changes. 127 */ 128 if (server) 129 afs_put_server(net, server); 130 server = NULL; 131 132 read_seqbegin_or_lock(&net->fs_lock, &seq); 133 134 p = net->fs_servers.rb_node; 135 while (p) { 136 server = rb_entry(p, struct afs_server, uuid_rb); 137 138 diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); 139 if (diff < 0) { 140 p = p->rb_left; 141 } else if (diff > 0) { 142 p = p->rb_right; 143 } else { 144 afs_get_server(server); 145 break; 146 } 147 148 server = NULL; 149 } 150 } while (need_seqretry(&net->fs_lock, seq)); 151 152 done_seqretry(&net->fs_lock, seq); 153 154 _leave(" = %p", server); 155 return server; 156 } 157 158 /* 159 * Install a server record in the namespace tree 160 */ 161 static struct afs_server *afs_install_server(struct afs_net *net, 162 struct afs_server *candidate) 163 { 164 const struct afs_addr_list *alist; 165 struct afs_server *server; 166 struct rb_node **pp, *p; 167 int ret = -EEXIST, diff; 168 169 _enter("%p", candidate); 170 171 write_seqlock(&net->fs_lock); 172 173 /* Firstly install the server in the UUID lookup tree */ 174 pp = &net->fs_servers.rb_node; 175 p = NULL; 176 while (*pp) { 177 p = *pp; 178 _debug("- consider %p", p); 179 server = rb_entry(p, struct afs_server, uuid_rb); 180 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); 181 if (diff < 0) 182 pp = &(*pp)->rb_left; 183 else if (diff > 0) 184 pp = &(*pp)->rb_right; 185 else 186 goto exists; 187 } 188 189 server = candidate; 190 rb_link_node(&server->uuid_rb, p, pp); 191 rb_insert_color(&server->uuid_rb, &net->fs_servers); 192 hlist_add_head_rcu(&server->proc_link, &net->fs_proc); 193 194 write_seqlock(&net->fs_addr_lock); 195 alist = rcu_dereference_protected(server->addresses, 196 lockdep_is_held(&net->fs_addr_lock.lock)); 197 198 /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install 199 * it in the IPv4 and/or IPv6 reverse-map lists. 200 * 201 * TODO: For speed we want to use something other than a flat list 202 * here; even sorting the list in terms of lowest address would help a 203 * bit, but anything we might want to do gets messy and memory 204 * intensive. 205 */ 206 if (alist->nr_ipv4 > 0) 207 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); 208 if (alist->nr_addrs > alist->nr_ipv4) 209 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); 210 211 write_sequnlock(&net->fs_addr_lock); 212 ret = 0; 213 214 exists: 215 afs_get_server(server); 216 write_sequnlock(&net->fs_lock); 217 return server; 218 } 219 220 /* 221 * allocate a new server record 222 */ 223 static struct afs_server *afs_alloc_server(struct afs_net *net, 224 const uuid_t *uuid, 225 struct afs_addr_list *alist) 226 { 227 struct afs_server *server; 228 229 _enter(""); 230 231 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 232 if (!server) 233 goto enomem; 234 235 atomic_set(&server->usage, 1); 236 RCU_INIT_POINTER(server->addresses, alist); 237 server->addr_version = alist->version; 238 server->uuid = *uuid; 239 server->flags = (1UL << AFS_SERVER_FL_NEW); 240 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 241 rwlock_init(&server->fs_lock); 242 INIT_LIST_HEAD(&server->cb_interests); 243 rwlock_init(&server->cb_break_lock); 244 245 afs_inc_servers_outstanding(net); 246 _leave(" = %p", server); 247 return server; 248 249 enomem: 250 _leave(" = NULL [nomem]"); 251 return NULL; 252 } 253 254 /* 255 * Look up an address record for a server 256 */ 257 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, 258 struct key *key, const uuid_t *uuid) 259 { 260 struct afs_addr_cursor ac; 261 struct afs_addr_list *alist; 262 int ret; 263 264 ret = afs_set_vl_cursor(&ac, cell); 265 if (ret < 0) 266 return ERR_PTR(ret); 267 268 while (afs_iterate_addresses(&ac)) { 269 if (test_bit(ac.index, &ac.alist->yfs)) 270 alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); 271 else 272 alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); 273 switch (ac.error) { 274 case 0: 275 afs_end_cursor(&ac); 276 return alist; 277 case -ECONNABORTED: 278 ac.error = afs_abort_to_error(ac.abort_code); 279 goto error; 280 case -ENOMEM: 281 case -ENONET: 282 goto error; 283 case -ENETUNREACH: 284 case -EHOSTUNREACH: 285 case -ECONNREFUSED: 286 break; 287 default: 288 ac.error = -EIO; 289 goto error; 290 } 291 } 292 293 error: 294 return ERR_PTR(afs_end_cursor(&ac)); 295 } 296 297 /* 298 * Get or create a fileserver record. 299 */ 300 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, 301 const uuid_t *uuid) 302 { 303 struct afs_addr_list *alist; 304 struct afs_server *server, *candidate; 305 306 _enter("%p,%pU", cell->net, uuid); 307 308 server = afs_find_server_by_uuid(cell->net, uuid); 309 if (server) 310 return server; 311 312 alist = afs_vl_lookup_addrs(cell, key, uuid); 313 if (IS_ERR(alist)) 314 return ERR_CAST(alist); 315 316 candidate = afs_alloc_server(cell->net, uuid, alist); 317 if (!candidate) { 318 afs_put_addrlist(alist); 319 return ERR_PTR(-ENOMEM); 320 } 321 322 server = afs_install_server(cell->net, candidate); 323 if (server != candidate) { 324 afs_put_addrlist(alist); 325 kfree(candidate); 326 } 327 328 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 329 return server; 330 } 331 332 /* 333 * Set the server timer to fire after a given delay, assuming it's not already 334 * set for an earlier time. 335 */ 336 static void afs_set_server_timer(struct afs_net *net, time64_t delay) 337 { 338 if (net->live) { 339 afs_inc_servers_outstanding(net); 340 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) 341 afs_dec_servers_outstanding(net); 342 } 343 } 344 345 /* 346 * Server management timer. We have an increment on fs_outstanding that we 347 * need to pass along to the work item. 348 */ 349 void afs_servers_timer(struct timer_list *timer) 350 { 351 struct afs_net *net = container_of(timer, struct afs_net, fs_timer); 352 353 _enter(""); 354 if (!queue_work(afs_wq, &net->fs_manager)) 355 afs_dec_servers_outstanding(net); 356 } 357 358 /* 359 * Release a reference on a server record. 360 */ 361 void afs_put_server(struct afs_net *net, struct afs_server *server) 362 { 363 unsigned int usage; 364 365 if (!server) 366 return; 367 368 server->put_time = ktime_get_real_seconds(); 369 370 usage = atomic_dec_return(&server->usage); 371 372 _enter("{%u}", usage); 373 374 if (likely(usage > 0)) 375 return; 376 377 afs_set_server_timer(net, afs_server_gc_delay); 378 } 379 380 static void afs_server_rcu(struct rcu_head *rcu) 381 { 382 struct afs_server *server = container_of(rcu, struct afs_server, rcu); 383 384 afs_put_addrlist(server->addresses); 385 kfree(server); 386 } 387 388 /* 389 * destroy a dead server 390 */ 391 static void afs_destroy_server(struct afs_net *net, struct afs_server *server) 392 { 393 struct afs_addr_list *alist = server->addresses; 394 struct afs_addr_cursor ac = { 395 .alist = alist, 396 .addr = &alist->addrs[0], 397 .start = alist->index, 398 .index = alist->index, 399 .error = 0, 400 }; 401 _enter("%p", server); 402 403 afs_fs_give_up_all_callbacks(net, server, &ac, NULL); 404 call_rcu(&server->rcu, afs_server_rcu); 405 afs_dec_servers_outstanding(net); 406 } 407 408 /* 409 * Garbage collect any expired servers. 410 */ 411 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) 412 { 413 struct afs_server *server; 414 bool deleted; 415 int usage; 416 417 while ((server = gc_list)) { 418 gc_list = server->gc_next; 419 420 write_seqlock(&net->fs_lock); 421 usage = 1; 422 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); 423 if (deleted) { 424 rb_erase(&server->uuid_rb, &net->fs_servers); 425 hlist_del_rcu(&server->proc_link); 426 } 427 write_sequnlock(&net->fs_lock); 428 429 if (deleted) 430 afs_destroy_server(net, server); 431 } 432 } 433 434 /* 435 * Manage the records of servers known to be within a network namespace. This 436 * includes garbage collecting unused servers. 437 * 438 * Note also that we were given an increment on net->servers_outstanding by 439 * whoever queued us that we need to deal with before returning. 440 */ 441 void afs_manage_servers(struct work_struct *work) 442 { 443 struct afs_net *net = container_of(work, struct afs_net, fs_manager); 444 struct afs_server *gc_list = NULL; 445 struct rb_node *cursor; 446 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; 447 bool purging = !net->live; 448 449 _enter(""); 450 451 /* Trawl the server list looking for servers that have expired from 452 * lack of use. 453 */ 454 read_seqlock_excl(&net->fs_lock); 455 456 for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { 457 struct afs_server *server = 458 rb_entry(cursor, struct afs_server, uuid_rb); 459 int usage = atomic_read(&server->usage); 460 461 _debug("manage %pU %u", &server->uuid, usage); 462 463 ASSERTCMP(usage, >=, 1); 464 ASSERTIFCMP(purging, usage, ==, 1); 465 466 if (usage == 1) { 467 time64_t expire_at = server->put_time; 468 469 if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && 470 !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) 471 expire_at += afs_server_gc_delay; 472 if (purging || expire_at <= now) { 473 server->gc_next = gc_list; 474 gc_list = server; 475 } else if (expire_at < next_manage) { 476 next_manage = expire_at; 477 } 478 } 479 } 480 481 read_sequnlock_excl(&net->fs_lock); 482 483 /* Update the timer on the way out. We have to pass an increment on 484 * servers_outstanding in the namespace that we are in to the timer or 485 * the work scheduler. 486 */ 487 if (!purging && next_manage < TIME64_MAX) { 488 now = ktime_get_real_seconds(); 489 490 if (next_manage - now <= 0) { 491 if (queue_work(afs_wq, &net->fs_manager)) 492 afs_inc_servers_outstanding(net); 493 } else { 494 afs_set_server_timer(net, next_manage - now); 495 } 496 } 497 498 afs_gc_servers(net, gc_list); 499 500 afs_dec_servers_outstanding(net); 501 _leave(" [%d]", atomic_read(&net->servers_outstanding)); 502 } 503 504 static void afs_queue_server_manager(struct afs_net *net) 505 { 506 afs_inc_servers_outstanding(net); 507 if (!queue_work(afs_wq, &net->fs_manager)) 508 afs_dec_servers_outstanding(net); 509 } 510 511 /* 512 * Purge list of servers. 513 */ 514 void afs_purge_servers(struct afs_net *net) 515 { 516 _enter(""); 517 518 if (del_timer_sync(&net->fs_timer)) 519 atomic_dec(&net->servers_outstanding); 520 521 afs_queue_server_manager(net); 522 523 _debug("wait"); 524 wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait, 525 TASK_UNINTERRUPTIBLE); 526 _leave(""); 527 } 528 529 /* 530 * Probe a fileserver to find its capabilities. 531 * 532 * TODO: Try service upgrade. 533 */ 534 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) 535 { 536 _enter(""); 537 538 fc->ac.addr = NULL; 539 fc->ac.start = READ_ONCE(fc->ac.alist->index); 540 fc->ac.index = fc->ac.start; 541 fc->ac.error = 0; 542 fc->ac.begun = false; 543 544 while (afs_iterate_addresses(&fc->ac)) { 545 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, 546 &fc->ac, fc->key); 547 switch (fc->ac.error) { 548 case 0: 549 afs_end_cursor(&fc->ac); 550 set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); 551 return true; 552 case -ECONNABORTED: 553 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 554 goto error; 555 case -ENOMEM: 556 case -ENONET: 557 goto error; 558 case -ENETUNREACH: 559 case -EHOSTUNREACH: 560 case -ECONNREFUSED: 561 case -ETIMEDOUT: 562 case -ETIME: 563 break; 564 default: 565 fc->ac.error = -EIO; 566 goto error; 567 } 568 } 569 570 error: 571 afs_end_cursor(&fc->ac); 572 return false; 573 } 574 575 /* 576 * If we haven't already, try probing the fileserver to get its capabilities. 577 * We try not to instigate parallel probes, but it's possible that the parallel 578 * probes will fail due to authentication failure when ours would succeed. 579 * 580 * TODO: Try sending an anonymous probe if an authenticated probe fails. 581 */ 582 bool afs_probe_fileserver(struct afs_fs_cursor *fc) 583 { 584 bool success; 585 int ret, retries = 0; 586 587 _enter(""); 588 589 retry: 590 if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { 591 _leave(" = t"); 592 return true; 593 } 594 595 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { 596 success = afs_do_probe_fileserver(fc); 597 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); 598 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); 599 _leave(" = t"); 600 return success; 601 } 602 603 _debug("wait"); 604 ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, 605 TASK_INTERRUPTIBLE); 606 if (ret == -ERESTARTSYS) { 607 fc->ac.error = ret; 608 _leave(" = f [%d]", ret); 609 return false; 610 } 611 612 retries++; 613 if (retries == 4) { 614 fc->ac.error = -ESTALE; 615 _leave(" = f [stale]"); 616 return false; 617 } 618 _debug("retry"); 619 goto retry; 620 } 621 622 /* 623 * Get an update for a server's address list. 624 */ 625 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 626 { 627 struct afs_addr_list *alist, *discard; 628 629 _enter(""); 630 631 alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, 632 &server->uuid); 633 if (IS_ERR(alist)) { 634 fc->ac.error = PTR_ERR(alist); 635 _leave(" = f [%d]", fc->ac.error); 636 return false; 637 } 638 639 discard = alist; 640 if (server->addr_version != alist->version) { 641 write_lock(&server->fs_lock); 642 discard = rcu_dereference_protected(server->addresses, 643 lockdep_is_held(&server->fs_lock)); 644 rcu_assign_pointer(server->addresses, alist); 645 server->addr_version = alist->version; 646 write_unlock(&server->fs_lock); 647 } 648 649 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 650 afs_put_addrlist(discard); 651 _leave(" = t"); 652 return true; 653 } 654 655 /* 656 * See if a server's address list needs updating. 657 */ 658 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 659 { 660 time64_t now = ktime_get_real_seconds(); 661 long diff; 662 bool success; 663 int ret, retries = 0; 664 665 _enter(""); 666 667 ASSERT(server); 668 669 retry: 670 diff = READ_ONCE(server->update_at) - now; 671 if (diff > 0) { 672 _leave(" = t [not now %ld]", diff); 673 return true; 674 } 675 676 if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { 677 success = afs_update_server_record(fc, server); 678 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); 679 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); 680 _leave(" = %d", success); 681 return success; 682 } 683 684 ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, 685 TASK_INTERRUPTIBLE); 686 if (ret == -ERESTARTSYS) { 687 fc->ac.error = ret; 688 _leave(" = f [intr]"); 689 return false; 690 } 691 692 retries++; 693 if (retries == 4) { 694 _leave(" = f [stale]"); 695 ret = -ESTALE; 696 return false; 697 } 698 goto retry; 699 } 700