1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS server record management 3 * 4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/slab.h> 10 #include "afs_fs.h" 11 #include "internal.h" 12 #include "protocol_yfs.h" 13 14 static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ 15 static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ 16 17 static void afs_inc_servers_outstanding(struct afs_net *net) 18 { 19 atomic_inc(&net->servers_outstanding); 20 } 21 22 static void afs_dec_servers_outstanding(struct afs_net *net) 23 { 24 if (atomic_dec_and_test(&net->servers_outstanding)) 25 wake_up_var(&net->servers_outstanding); 26 } 27 28 /* 29 * Find a server by one of its addresses. 30 */ 31 struct afs_server *afs_find_server(struct afs_net *net, 32 const struct sockaddr_rxrpc *srx) 33 { 34 const struct sockaddr_in6 *a = &srx->transport.sin6, *b; 35 const struct afs_addr_list *alist; 36 struct afs_server *server = NULL; 37 unsigned int i; 38 bool ipv6 = true; 39 int seq = 0, diff; 40 41 if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || 42 srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || 43 srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) 44 ipv6 = false; 45 46 rcu_read_lock(); 47 48 do { 49 if (server) 50 afs_put_server(net, server); 51 server = NULL; 52 read_seqbegin_or_lock(&net->fs_addr_lock, &seq); 53 54 if (ipv6) { 55 hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { 56 alist = rcu_dereference(server->addresses); 57 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { 58 b = &alist->addrs[i].transport.sin6; 59 diff = ((u16 __force)a->sin6_port - 60 (u16 __force)b->sin6_port); 61 if (diff == 0) 62 diff = memcmp(&a->sin6_addr, 63 &b->sin6_addr, 64 sizeof(struct in6_addr)); 65 if (diff == 0) 66 goto found; 67 } 68 } 69 } else { 70 hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { 71 alist = rcu_dereference(server->addresses); 72 for (i = 0; i < alist->nr_ipv4; i++) { 73 b = &alist->addrs[i].transport.sin6; 74 diff = ((u16 __force)a->sin6_port - 75 (u16 __force)b->sin6_port); 76 if (diff == 0) 77 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - 78 (u32 __force)b->sin6_addr.s6_addr32[3]); 79 if (diff == 0) 80 goto found; 81 } 82 } 83 } 84 85 server = NULL; 86 found: 87 if (server && !atomic_inc_not_zero(&server->usage)) 88 server = NULL; 89 90 } while (need_seqretry(&net->fs_addr_lock, seq)); 91 92 done_seqretry(&net->fs_addr_lock, seq); 93 94 rcu_read_unlock(); 95 return server; 96 } 97 98 /* 99 * Look up a server by its UUID 100 */ 101 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) 102 { 103 struct afs_server *server = NULL; 104 struct rb_node *p; 105 int diff, seq = 0; 106 107 _enter("%pU", uuid); 108 109 do { 110 /* Unfortunately, rbtree walking doesn't give reliable results 111 * under just the RCU read lock, so we have to check for 112 * changes. 113 */ 114 if (server) 115 afs_put_server(net, server); 116 server = NULL; 117 118 read_seqbegin_or_lock(&net->fs_lock, &seq); 119 120 p = net->fs_servers.rb_node; 121 while (p) { 122 server = rb_entry(p, struct afs_server, uuid_rb); 123 124 diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); 125 if (diff < 0) { 126 p = p->rb_left; 127 } else if (diff > 0) { 128 p = p->rb_right; 129 } else { 130 afs_get_server(server); 131 break; 132 } 133 134 server = NULL; 135 } 136 } while (need_seqretry(&net->fs_lock, seq)); 137 138 done_seqretry(&net->fs_lock, seq); 139 140 _leave(" = %p", server); 141 return server; 142 } 143 144 /* 145 * Install a server record in the namespace tree 146 */ 147 static struct afs_server *afs_install_server(struct afs_net *net, 148 struct afs_server *candidate) 149 { 150 const struct afs_addr_list *alist; 151 struct afs_server *server; 152 struct rb_node **pp, *p; 153 int ret = -EEXIST, diff; 154 155 _enter("%p", candidate); 156 157 write_seqlock(&net->fs_lock); 158 159 /* Firstly install the server in the UUID lookup tree */ 160 pp = &net->fs_servers.rb_node; 161 p = NULL; 162 while (*pp) { 163 p = *pp; 164 _debug("- consider %p", p); 165 server = rb_entry(p, struct afs_server, uuid_rb); 166 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); 167 if (diff < 0) 168 pp = &(*pp)->rb_left; 169 else if (diff > 0) 170 pp = &(*pp)->rb_right; 171 else 172 goto exists; 173 } 174 175 server = candidate; 176 rb_link_node(&server->uuid_rb, p, pp); 177 rb_insert_color(&server->uuid_rb, &net->fs_servers); 178 hlist_add_head_rcu(&server->proc_link, &net->fs_proc); 179 180 write_seqlock(&net->fs_addr_lock); 181 alist = rcu_dereference_protected(server->addresses, 182 lockdep_is_held(&net->fs_addr_lock.lock)); 183 184 /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install 185 * it in the IPv4 and/or IPv6 reverse-map lists. 186 * 187 * TODO: For speed we want to use something other than a flat list 188 * here; even sorting the list in terms of lowest address would help a 189 * bit, but anything we might want to do gets messy and memory 190 * intensive. 191 */ 192 if (alist->nr_ipv4 > 0) 193 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); 194 if (alist->nr_addrs > alist->nr_ipv4) 195 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); 196 197 write_sequnlock(&net->fs_addr_lock); 198 ret = 0; 199 200 exists: 201 afs_get_server(server); 202 write_sequnlock(&net->fs_lock); 203 return server; 204 } 205 206 /* 207 * allocate a new server record 208 */ 209 static struct afs_server *afs_alloc_server(struct afs_net *net, 210 const uuid_t *uuid, 211 struct afs_addr_list *alist) 212 { 213 struct afs_server *server; 214 215 _enter(""); 216 217 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 218 if (!server) 219 goto enomem; 220 221 atomic_set(&server->usage, 1); 222 RCU_INIT_POINTER(server->addresses, alist); 223 server->addr_version = alist->version; 224 server->uuid = *uuid; 225 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 226 rwlock_init(&server->fs_lock); 227 INIT_HLIST_HEAD(&server->cb_volumes); 228 rwlock_init(&server->cb_break_lock); 229 init_waitqueue_head(&server->probe_wq); 230 spin_lock_init(&server->probe_lock); 231 232 afs_inc_servers_outstanding(net); 233 _leave(" = %p", server); 234 return server; 235 236 enomem: 237 _leave(" = NULL [nomem]"); 238 return NULL; 239 } 240 241 /* 242 * Look up an address record for a server 243 */ 244 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, 245 struct key *key, const uuid_t *uuid) 246 { 247 struct afs_vl_cursor vc; 248 struct afs_addr_list *alist = NULL; 249 int ret; 250 251 ret = -ERESTARTSYS; 252 if (afs_begin_vlserver_operation(&vc, cell, key)) { 253 while (afs_select_vlserver(&vc)) { 254 if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) 255 alist = afs_yfsvl_get_endpoints(&vc, uuid); 256 else 257 alist = afs_vl_get_addrs_u(&vc, uuid); 258 } 259 260 ret = afs_end_vlserver_operation(&vc); 261 } 262 263 return ret < 0 ? ERR_PTR(ret) : alist; 264 } 265 266 /* 267 * Get or create a fileserver record. 268 */ 269 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, 270 const uuid_t *uuid) 271 { 272 struct afs_addr_list *alist; 273 struct afs_server *server, *candidate; 274 275 _enter("%p,%pU", cell->net, uuid); 276 277 server = afs_find_server_by_uuid(cell->net, uuid); 278 if (server) 279 return server; 280 281 alist = afs_vl_lookup_addrs(cell, key, uuid); 282 if (IS_ERR(alist)) 283 return ERR_CAST(alist); 284 285 candidate = afs_alloc_server(cell->net, uuid, alist); 286 if (!candidate) { 287 afs_put_addrlist(alist); 288 return ERR_PTR(-ENOMEM); 289 } 290 291 server = afs_install_server(cell->net, candidate); 292 if (server != candidate) { 293 afs_put_addrlist(alist); 294 kfree(candidate); 295 } 296 297 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 298 return server; 299 } 300 301 /* 302 * Set the server timer to fire after a given delay, assuming it's not already 303 * set for an earlier time. 304 */ 305 static void afs_set_server_timer(struct afs_net *net, time64_t delay) 306 { 307 if (net->live) { 308 afs_inc_servers_outstanding(net); 309 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) 310 afs_dec_servers_outstanding(net); 311 } 312 } 313 314 /* 315 * Server management timer. We have an increment on fs_outstanding that we 316 * need to pass along to the work item. 317 */ 318 void afs_servers_timer(struct timer_list *timer) 319 { 320 struct afs_net *net = container_of(timer, struct afs_net, fs_timer); 321 322 _enter(""); 323 if (!queue_work(afs_wq, &net->fs_manager)) 324 afs_dec_servers_outstanding(net); 325 } 326 327 /* 328 * Release a reference on a server record. 329 */ 330 void afs_put_server(struct afs_net *net, struct afs_server *server) 331 { 332 unsigned int usage; 333 334 if (!server) 335 return; 336 337 server->put_time = ktime_get_real_seconds(); 338 339 usage = atomic_dec_return(&server->usage); 340 341 _enter("{%u}", usage); 342 343 if (likely(usage > 0)) 344 return; 345 346 afs_set_server_timer(net, afs_server_gc_delay); 347 } 348 349 static void afs_server_rcu(struct rcu_head *rcu) 350 { 351 struct afs_server *server = container_of(rcu, struct afs_server, rcu); 352 353 afs_put_addrlist(rcu_access_pointer(server->addresses)); 354 kfree(server); 355 } 356 357 /* 358 * destroy a dead server 359 */ 360 static void afs_destroy_server(struct afs_net *net, struct afs_server *server) 361 { 362 struct afs_addr_list *alist = rcu_access_pointer(server->addresses); 363 struct afs_addr_cursor ac = { 364 .alist = alist, 365 .index = alist->preferred, 366 .error = 0, 367 }; 368 _enter("%p", server); 369 370 if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) 371 afs_fs_give_up_all_callbacks(net, server, &ac, NULL); 372 373 wait_var_event(&server->probe_outstanding, 374 atomic_read(&server->probe_outstanding) == 0); 375 376 call_rcu(&server->rcu, afs_server_rcu); 377 afs_dec_servers_outstanding(net); 378 } 379 380 /* 381 * Garbage collect any expired servers. 382 */ 383 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) 384 { 385 struct afs_server *server; 386 bool deleted; 387 int usage; 388 389 while ((server = gc_list)) { 390 gc_list = server->gc_next; 391 392 write_seqlock(&net->fs_lock); 393 usage = 1; 394 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); 395 if (deleted) { 396 rb_erase(&server->uuid_rb, &net->fs_servers); 397 hlist_del_rcu(&server->proc_link); 398 } 399 write_sequnlock(&net->fs_lock); 400 401 if (deleted) { 402 write_seqlock(&net->fs_addr_lock); 403 if (!hlist_unhashed(&server->addr4_link)) 404 hlist_del_rcu(&server->addr4_link); 405 if (!hlist_unhashed(&server->addr6_link)) 406 hlist_del_rcu(&server->addr6_link); 407 write_sequnlock(&net->fs_addr_lock); 408 afs_destroy_server(net, server); 409 } 410 } 411 } 412 413 /* 414 * Manage the records of servers known to be within a network namespace. This 415 * includes garbage collecting unused servers. 416 * 417 * Note also that we were given an increment on net->servers_outstanding by 418 * whoever queued us that we need to deal with before returning. 419 */ 420 void afs_manage_servers(struct work_struct *work) 421 { 422 struct afs_net *net = container_of(work, struct afs_net, fs_manager); 423 struct afs_server *gc_list = NULL; 424 struct rb_node *cursor; 425 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; 426 bool purging = !net->live; 427 428 _enter(""); 429 430 /* Trawl the server list looking for servers that have expired from 431 * lack of use. 432 */ 433 read_seqlock_excl(&net->fs_lock); 434 435 for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { 436 struct afs_server *server = 437 rb_entry(cursor, struct afs_server, uuid_rb); 438 int usage = atomic_read(&server->usage); 439 440 _debug("manage %pU %u", &server->uuid, usage); 441 442 ASSERTCMP(usage, >=, 1); 443 ASSERTIFCMP(purging, usage, ==, 1); 444 445 if (usage == 1) { 446 time64_t expire_at = server->put_time; 447 448 if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && 449 !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) 450 expire_at += afs_server_gc_delay; 451 if (purging || expire_at <= now) { 452 server->gc_next = gc_list; 453 gc_list = server; 454 } else if (expire_at < next_manage) { 455 next_manage = expire_at; 456 } 457 } 458 } 459 460 read_sequnlock_excl(&net->fs_lock); 461 462 /* Update the timer on the way out. We have to pass an increment on 463 * servers_outstanding in the namespace that we are in to the timer or 464 * the work scheduler. 465 */ 466 if (!purging && next_manage < TIME64_MAX) { 467 now = ktime_get_real_seconds(); 468 469 if (next_manage - now <= 0) { 470 if (queue_work(afs_wq, &net->fs_manager)) 471 afs_inc_servers_outstanding(net); 472 } else { 473 afs_set_server_timer(net, next_manage - now); 474 } 475 } 476 477 afs_gc_servers(net, gc_list); 478 479 afs_dec_servers_outstanding(net); 480 _leave(" [%d]", atomic_read(&net->servers_outstanding)); 481 } 482 483 static void afs_queue_server_manager(struct afs_net *net) 484 { 485 afs_inc_servers_outstanding(net); 486 if (!queue_work(afs_wq, &net->fs_manager)) 487 afs_dec_servers_outstanding(net); 488 } 489 490 /* 491 * Purge list of servers. 492 */ 493 void afs_purge_servers(struct afs_net *net) 494 { 495 _enter(""); 496 497 if (del_timer_sync(&net->fs_timer)) 498 atomic_dec(&net->servers_outstanding); 499 500 afs_queue_server_manager(net); 501 502 _debug("wait"); 503 wait_var_event(&net->servers_outstanding, 504 !atomic_read(&net->servers_outstanding)); 505 _leave(""); 506 } 507 508 /* 509 * Get an update for a server's address list. 510 */ 511 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 512 { 513 struct afs_addr_list *alist, *discard; 514 515 _enter(""); 516 517 alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, 518 &server->uuid); 519 if (IS_ERR(alist)) { 520 if ((PTR_ERR(alist) == -ERESTARTSYS || 521 PTR_ERR(alist) == -EINTR) && 522 !(fc->flags & AFS_FS_CURSOR_INTR) && 523 server->addresses) { 524 _leave(" = t [intr]"); 525 return true; 526 } 527 fc->error = PTR_ERR(alist); 528 _leave(" = f [%d]", fc->error); 529 return false; 530 } 531 532 discard = alist; 533 if (server->addr_version != alist->version) { 534 write_lock(&server->fs_lock); 535 discard = rcu_dereference_protected(server->addresses, 536 lockdep_is_held(&server->fs_lock)); 537 rcu_assign_pointer(server->addresses, alist); 538 server->addr_version = alist->version; 539 write_unlock(&server->fs_lock); 540 } 541 542 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 543 afs_put_addrlist(discard); 544 _leave(" = t"); 545 return true; 546 } 547 548 /* 549 * See if a server's address list needs updating. 550 */ 551 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 552 { 553 time64_t now = ktime_get_real_seconds(); 554 long diff; 555 bool success; 556 int ret, retries = 0; 557 558 _enter(""); 559 560 ASSERT(server); 561 562 retry: 563 diff = READ_ONCE(server->update_at) - now; 564 if (diff > 0) { 565 _leave(" = t [not now %ld]", diff); 566 return true; 567 } 568 569 if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { 570 success = afs_update_server_record(fc, server); 571 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); 572 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); 573 _leave(" = %d", success); 574 return success; 575 } 576 577 ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, 578 TASK_INTERRUPTIBLE); 579 if (ret == -ERESTARTSYS) { 580 if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) { 581 _leave(" = t [intr]"); 582 return true; 583 } 584 fc->error = ret; 585 _leave(" = f [intr]"); 586 return false; 587 } 588 589 retries++; 590 if (retries == 4) { 591 _leave(" = f [stale]"); 592 ret = -ESTALE; 593 return false; 594 } 595 goto retry; 596 } 597