1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS volume management 3 * 4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include "internal.h" 11 12 unsigned __read_mostly afs_volume_gc_delay = 10; 13 unsigned __read_mostly afs_volume_record_life = 60 * 60; 14 15 /* 16 * Insert a volume into a cell. If there's an existing volume record, that is 17 * returned instead with a ref held. 18 */ 19 static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, 20 struct afs_volume *volume) 21 { 22 struct afs_volume *p; 23 struct rb_node *parent = NULL, **pp; 24 25 write_seqlock(&cell->volume_lock); 26 27 pp = &cell->volumes.rb_node; 28 while (*pp) { 29 parent = *pp; 30 p = rb_entry(parent, struct afs_volume, cell_node); 31 if (p->vid < volume->vid) { 32 pp = &(*pp)->rb_left; 33 } else if (p->vid > volume->vid) { 34 pp = &(*pp)->rb_right; 35 } else { 36 volume = afs_get_volume(p, afs_volume_trace_get_cell_insert); 37 goto found; 38 } 39 } 40 41 rb_link_node_rcu(&volume->cell_node, parent, pp); 42 rb_insert_color(&volume->cell_node, &cell->volumes); 43 hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); 44 45 found: 46 write_sequnlock(&cell->volume_lock); 47 return volume; 48 49 } 50 51 static void afs_remove_volume_from_cell(struct afs_volume *volume) 52 { 53 struct afs_cell *cell = volume->cell; 54 55 if (!hlist_unhashed(&volume->proc_link)) { 56 trace_afs_volume(volume->vid, atomic_read(&volume->usage), 57 afs_volume_trace_remove); 58 write_seqlock(&cell->volume_lock); 59 hlist_del_rcu(&volume->proc_link); 60 rb_erase(&volume->cell_node, &cell->volumes); 61 write_sequnlock(&cell->volume_lock); 62 } 63 } 64 65 /* 66 * Allocate a volume record and load it up from a vldb record. 67 */ 68 static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, 69 struct afs_vldb_entry *vldb, 70 unsigned long type_mask) 71 { 72 struct afs_server_list *slist; 73 struct afs_volume *volume; 74 int ret = -ENOMEM, nr_servers = 0, i; 75 76 for (i = 0; i < vldb->nr_servers; i++) 77 if (vldb->fs_mask[i] & type_mask) 78 nr_servers++; 79 80 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 81 if (!volume) 82 goto error_0; 83 84 volume->vid = vldb->vid[params->type]; 85 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 86 volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); 87 volume->type = params->type; 88 volume->type_force = params->force; 89 volume->name_len = vldb->name_len; 90 91 atomic_set(&volume->usage, 1); 92 INIT_HLIST_NODE(&volume->proc_link); 93 rwlock_init(&volume->servers_lock); 94 rwlock_init(&volume->cb_v_break_lock); 95 memcpy(volume->name, vldb->name, vldb->name_len + 1); 96 97 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); 98 if (IS_ERR(slist)) { 99 ret = PTR_ERR(slist); 100 goto error_1; 101 } 102 103 refcount_set(&slist->usage, 1); 104 rcu_assign_pointer(volume->servers, slist); 105 trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); 106 return volume; 107 108 error_1: 109 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 110 kfree(volume); 111 error_0: 112 return ERR_PTR(ret); 113 } 114 115 /* 116 * Look up or allocate a volume record. 117 */ 118 static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, 119 struct afs_vldb_entry *vldb, 120 unsigned long type_mask) 121 { 122 struct afs_volume *candidate, *volume; 123 124 candidate = afs_alloc_volume(params, vldb, type_mask); 125 if (IS_ERR(candidate)) 126 return candidate; 127 128 volume = afs_insert_volume_into_cell(params->cell, candidate); 129 if (volume != candidate) 130 afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup); 131 return volume; 132 } 133 134 /* 135 * Look up a VLDB record for a volume. 136 */ 137 static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, 138 struct key *key, 139 const char *volname, 140 size_t volnamesz) 141 { 142 struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); 143 struct afs_vl_cursor vc; 144 int ret; 145 146 if (!afs_begin_vlserver_operation(&vc, cell, key)) 147 return ERR_PTR(-ERESTARTSYS); 148 149 while (afs_select_vlserver(&vc)) { 150 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); 151 } 152 153 ret = afs_end_vlserver_operation(&vc); 154 return ret < 0 ? ERR_PTR(ret) : vldb; 155 } 156 157 /* 158 * Look up a volume in the VL server and create a candidate volume record for 159 * it. 160 * 161 * The volume name can be one of the following: 162 * "%[cell:]volume[.]" R/W volume 163 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 164 * or R/W (rwparent=1) volume 165 * "%[cell:]volume.readonly" R/O volume 166 * "#[cell:]volume.readonly" R/O volume 167 * "%[cell:]volume.backup" Backup volume 168 * "#[cell:]volume.backup" Backup volume 169 * 170 * The cell name is optional, and defaults to the current cell. 171 * 172 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin 173 * Guide 174 * - Rule 1: Explicit type suffix forces access of that type or nothing 175 * (no suffix, then use Rule 2 & 3) 176 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W 177 * if not available 178 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 179 * explicitly told otherwise 180 */ 181 struct afs_volume *afs_create_volume(struct afs_fs_context *params) 182 { 183 struct afs_vldb_entry *vldb; 184 struct afs_volume *volume; 185 unsigned long type_mask = 1UL << params->type; 186 187 vldb = afs_vl_lookup_vldb(params->cell, params->key, 188 params->volname, params->volnamesz); 189 if (IS_ERR(vldb)) 190 return ERR_CAST(vldb); 191 192 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { 193 volume = ERR_PTR(vldb->error); 194 goto error; 195 } 196 197 /* Make the final decision on the type we want */ 198 volume = ERR_PTR(-ENOMEDIUM); 199 if (params->force) { 200 if (!(vldb->flags & type_mask)) 201 goto error; 202 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { 203 params->type = AFSVL_ROVOL; 204 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { 205 params->type = AFSVL_RWVOL; 206 } else { 207 goto error; 208 } 209 210 type_mask = 1UL << params->type; 211 volume = afs_lookup_volume(params, vldb, type_mask); 212 213 error: 214 kfree(vldb); 215 return volume; 216 } 217 218 /* 219 * Destroy a volume record 220 */ 221 static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) 222 { 223 _enter("%p", volume); 224 225 #ifdef CONFIG_AFS_FSCACHE 226 ASSERTCMP(volume->cache, ==, NULL); 227 #endif 228 229 afs_remove_volume_from_cell(volume); 230 afs_put_serverlist(net, rcu_access_pointer(volume->servers)); 231 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 232 trace_afs_volume(volume->vid, atomic_read(&volume->usage), 233 afs_volume_trace_free); 234 kfree_rcu(volume, rcu); 235 236 _leave(" [destroyed]"); 237 } 238 239 /* 240 * Get a reference on a volume record. 241 */ 242 struct afs_volume *afs_get_volume(struct afs_volume *volume, 243 enum afs_volume_trace reason) 244 { 245 if (volume) { 246 int u = atomic_inc_return(&volume->usage); 247 trace_afs_volume(volume->vid, u, reason); 248 } 249 return volume; 250 } 251 252 253 /* 254 * Drop a reference on a volume record. 255 */ 256 void afs_put_volume(struct afs_net *net, struct afs_volume *volume, 257 enum afs_volume_trace reason) 258 { 259 if (volume) { 260 afs_volid_t vid = volume->vid; 261 int u = atomic_dec_return(&volume->usage); 262 trace_afs_volume(vid, u, reason); 263 if (u == 0) 264 afs_destroy_volume(net, volume); 265 } 266 } 267 268 /* 269 * Activate a volume. 270 */ 271 void afs_activate_volume(struct afs_volume *volume) 272 { 273 #ifdef CONFIG_AFS_FSCACHE 274 volume->cache = fscache_acquire_cookie(volume->cell->cache, 275 &afs_volume_cache_index_def, 276 &volume->vid, sizeof(volume->vid), 277 NULL, 0, 278 volume, 0, true); 279 #endif 280 } 281 282 /* 283 * Deactivate a volume. 284 */ 285 void afs_deactivate_volume(struct afs_volume *volume) 286 { 287 _enter("%s", volume->name); 288 289 #ifdef CONFIG_AFS_FSCACHE 290 fscache_relinquish_cookie(volume->cache, NULL, 291 test_bit(AFS_VOLUME_DELETED, &volume->flags)); 292 volume->cache = NULL; 293 #endif 294 295 _leave(""); 296 } 297 298 /* 299 * Query the VL service to update the volume status. 300 */ 301 static int afs_update_volume_status(struct afs_volume *volume, struct key *key) 302 { 303 struct afs_server_list *new, *old, *discard; 304 struct afs_vldb_entry *vldb; 305 char idbuf[16]; 306 int ret, idsz; 307 308 _enter(""); 309 310 /* We look up an ID by passing it as a decimal string in the 311 * operation's name parameter. 312 */ 313 idsz = sprintf(idbuf, "%llu", volume->vid); 314 315 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); 316 if (IS_ERR(vldb)) { 317 ret = PTR_ERR(vldb); 318 goto error; 319 } 320 321 /* See if the volume got renamed. */ 322 if (vldb->name_len != volume->name_len || 323 memcmp(vldb->name, volume->name, vldb->name_len) != 0) { 324 /* TODO: Use RCU'd string. */ 325 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); 326 volume->name_len = vldb->name_len; 327 } 328 329 /* See if the volume's server list got updated. */ 330 new = afs_alloc_server_list(volume->cell, key, 331 vldb, (1 << volume->type)); 332 if (IS_ERR(new)) { 333 ret = PTR_ERR(new); 334 goto error_vldb; 335 } 336 337 write_lock(&volume->servers_lock); 338 339 discard = new; 340 old = rcu_dereference_protected(volume->servers, 341 lockdep_is_held(&volume->servers_lock)); 342 if (afs_annotate_server_list(new, old)) { 343 new->seq = volume->servers_seq + 1; 344 rcu_assign_pointer(volume->servers, new); 345 smp_wmb(); 346 volume->servers_seq++; 347 discard = old; 348 } 349 350 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 351 write_unlock(&volume->servers_lock); 352 ret = 0; 353 354 afs_put_serverlist(volume->cell->net, discard); 355 error_vldb: 356 kfree(vldb); 357 error: 358 _leave(" = %d", ret); 359 return ret; 360 } 361 362 /* 363 * Make sure the volume record is up to date. 364 */ 365 int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) 366 { 367 int ret, retries = 0; 368 369 _enter(""); 370 371 retry: 372 if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) 373 goto wait; 374 if (volume->update_at <= ktime_get_real_seconds() || 375 test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) 376 goto update; 377 _leave(" = 0"); 378 return 0; 379 380 update: 381 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { 382 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 383 ret = afs_update_volume_status(volume, op->key); 384 if (ret < 0) 385 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 386 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); 387 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); 388 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); 389 _leave(" = %d", ret); 390 return ret; 391 } 392 393 wait: 394 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { 395 _leave(" = 0 [no wait]"); 396 return 0; 397 } 398 399 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, 400 (op->flags & AFS_OPERATION_UNINTR) ? 401 TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); 402 if (ret == -ERESTARTSYS) { 403 _leave(" = %d", ret); 404 return ret; 405 } 406 407 retries++; 408 if (retries == 4) { 409 _leave(" = -ESTALE"); 410 return -ESTALE; 411 } 412 goto retry; 413 } 414