1 /* AFS volume management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/slab.h> 16 #include <linux/fs.h> 17 #include <linux/pagemap.h> 18 #include <linux/sched.h> 19 #include "internal.h" 20 21 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; 22 23 /* 24 * lookup a volume by name 25 * - this can be one of the following: 26 * "%[cell:]volume[.]" R/W volume 27 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 28 * or R/W (rwparent=1) volume 29 * "%[cell:]volume.readonly" R/O volume 30 * "#[cell:]volume.readonly" R/O volume 31 * "%[cell:]volume.backup" Backup volume 32 * "#[cell:]volume.backup" Backup volume 33 * 34 * The cell name is optional, and defaults to the current cell. 35 * 36 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin 37 * Guide 38 * - Rule 1: Explicit type suffix forces access of that type or nothing 39 * (no suffix, then use Rule 2 & 3) 40 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W 41 * if not available 42 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 43 * explicitly told otherwise 44 */ 45 struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) 46 { 47 struct afs_vlocation *vlocation = NULL; 48 struct afs_volume *volume = NULL; 49 struct afs_server *server = NULL; 50 char srvtmask; 51 int ret, loop; 52 53 _enter("{%*.*s,%d}", 54 params->volnamesz, params->volnamesz, params->volname, params->rwpath); 55 56 /* lookup the volume location record */ 57 vlocation = afs_vlocation_lookup(params->cell, params->key, 58 params->volname, params->volnamesz); 59 if (IS_ERR(vlocation)) { 60 ret = PTR_ERR(vlocation); 61 vlocation = NULL; 62 goto error; 63 } 64 65 /* make the final decision on the type we want */ 66 ret = -ENOMEDIUM; 67 if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) 68 goto error; 69 70 srvtmask = 0; 71 for (loop = 0; loop < vlocation->vldb.nservers; loop++) 72 srvtmask |= vlocation->vldb.srvtmask[loop]; 73 74 if (params->force) { 75 if (!(srvtmask & (1 << params->type))) 76 goto error; 77 } else if (srvtmask & AFS_VOL_VTM_RO) { 78 params->type = AFSVL_ROVOL; 79 } else if (srvtmask & AFS_VOL_VTM_RW) { 80 params->type = AFSVL_RWVOL; 81 } else { 82 goto error; 83 } 84 85 down_write(¶ms->cell->vl_sem); 86 87 /* is the volume already active? */ 88 if (vlocation->vols[params->type]) { 89 /* yes - re-use it */ 90 volume = vlocation->vols[params->type]; 91 afs_get_volume(volume); 92 goto success; 93 } 94 95 /* create a new volume record */ 96 _debug("creating new volume record"); 97 98 ret = -ENOMEM; 99 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 100 if (!volume) 101 goto error_up; 102 103 atomic_set(&volume->usage, 1); 104 volume->type = params->type; 105 volume->type_force = params->force; 106 volume->cell = params->cell; 107 volume->vid = vlocation->vldb.vid[params->type]; 108 109 volume->bdi.ra_pages = VM_MAX_READAHEAD*1024/PAGE_SIZE; 110 ret = bdi_setup_and_register(&volume->bdi, "afs"); 111 if (ret) 112 goto error_bdi; 113 114 init_rwsem(&volume->server_sem); 115 116 /* look up all the applicable server records */ 117 for (loop = 0; loop < 8; loop++) { 118 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { 119 server = afs_lookup_server( 120 volume->cell, &vlocation->vldb.servers[loop]); 121 if (IS_ERR(server)) { 122 ret = PTR_ERR(server); 123 goto error_discard; 124 } 125 126 volume->servers[volume->nservers] = server; 127 volume->nservers++; 128 } 129 } 130 131 /* attach the cache and volume location */ 132 #ifdef CONFIG_AFS_FSCACHE 133 volume->cache = fscache_acquire_cookie(vlocation->cache, 134 &afs_volume_cache_index_def, 135 volume, true); 136 #endif 137 afs_get_vlocation(vlocation); 138 volume->vlocation = vlocation; 139 140 vlocation->vols[volume->type] = volume; 141 142 success: 143 _debug("kAFS selected %s volume %08x", 144 afs_voltypes[volume->type], volume->vid); 145 up_write(¶ms->cell->vl_sem); 146 afs_put_vlocation(vlocation); 147 _leave(" = %p", volume); 148 return volume; 149 150 /* clean up */ 151 error_up: 152 up_write(¶ms->cell->vl_sem); 153 error: 154 afs_put_vlocation(vlocation); 155 _leave(" = %d", ret); 156 return ERR_PTR(ret); 157 158 error_discard: 159 bdi_destroy(&volume->bdi); 160 error_bdi: 161 up_write(¶ms->cell->vl_sem); 162 163 for (loop = volume->nservers - 1; loop >= 0; loop--) 164 afs_put_server(volume->servers[loop]); 165 166 kfree(volume); 167 goto error; 168 } 169 170 /* 171 * destroy a volume record 172 */ 173 void afs_put_volume(struct afs_volume *volume) 174 { 175 struct afs_vlocation *vlocation; 176 int loop; 177 178 if (!volume) 179 return; 180 181 _enter("%p", volume); 182 183 ASSERTCMP(atomic_read(&volume->usage), >, 0); 184 185 vlocation = volume->vlocation; 186 187 /* to prevent a race, the decrement and the dequeue must be effectively 188 * atomic */ 189 down_write(&vlocation->cell->vl_sem); 190 191 if (likely(!atomic_dec_and_test(&volume->usage))) { 192 up_write(&vlocation->cell->vl_sem); 193 _leave(""); 194 return; 195 } 196 197 vlocation->vols[volume->type] = NULL; 198 199 up_write(&vlocation->cell->vl_sem); 200 201 /* finish cleaning up the volume */ 202 #ifdef CONFIG_AFS_FSCACHE 203 fscache_relinquish_cookie(volume->cache, 0); 204 #endif 205 afs_put_vlocation(vlocation); 206 207 for (loop = volume->nservers - 1; loop >= 0; loop--) 208 afs_put_server(volume->servers[loop]); 209 210 bdi_destroy(&volume->bdi); 211 kfree(volume); 212 213 _leave(" [destroyed]"); 214 } 215 216 /* 217 * pick a server to use to try accessing this volume 218 * - returns with an elevated usage count on the server chosen 219 */ 220 struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) 221 { 222 struct afs_volume *volume = vnode->volume; 223 struct afs_server *server; 224 int ret, state, loop; 225 226 _enter("%s", volume->vlocation->vldb.name); 227 228 /* stick with the server we're already using if we can */ 229 if (vnode->server && vnode->server->fs_state == 0) { 230 afs_get_server(vnode->server); 231 _leave(" = %p [current]", vnode->server); 232 return vnode->server; 233 } 234 235 down_read(&volume->server_sem); 236 237 /* handle the no-server case */ 238 if (volume->nservers == 0) { 239 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; 240 up_read(&volume->server_sem); 241 _leave(" = %d [no servers]", ret); 242 return ERR_PTR(ret); 243 } 244 245 /* basically, just search the list for the first live server and use 246 * that */ 247 ret = 0; 248 for (loop = 0; loop < volume->nservers; loop++) { 249 server = volume->servers[loop]; 250 state = server->fs_state; 251 252 _debug("consider %d [%d]", loop, state); 253 254 switch (state) { 255 /* found an apparently healthy server */ 256 case 0: 257 afs_get_server(server); 258 up_read(&volume->server_sem); 259 _leave(" = %p (picked %08x)", 260 server, ntohl(server->addr.s_addr)); 261 return server; 262 263 case -ENETUNREACH: 264 if (ret == 0) 265 ret = state; 266 break; 267 268 case -EHOSTUNREACH: 269 if (ret == 0 || 270 ret == -ENETUNREACH) 271 ret = state; 272 break; 273 274 case -ECONNREFUSED: 275 if (ret == 0 || 276 ret == -ENETUNREACH || 277 ret == -EHOSTUNREACH) 278 ret = state; 279 break; 280 281 default: 282 case -EREMOTEIO: 283 if (ret == 0 || 284 ret == -ENETUNREACH || 285 ret == -EHOSTUNREACH || 286 ret == -ECONNREFUSED) 287 ret = state; 288 break; 289 } 290 } 291 292 /* no available servers 293 * - TODO: handle the no active servers case better 294 */ 295 up_read(&volume->server_sem); 296 _leave(" = %d", ret); 297 return ERR_PTR(ret); 298 } 299 300 /* 301 * release a server after use 302 * - releases the ref on the server struct that was acquired by picking 303 * - records result of using a particular server to access a volume 304 * - return 0 to try again, 1 if okay or to issue error 305 * - the caller must release the server struct if result was 0 306 */ 307 int afs_volume_release_fileserver(struct afs_vnode *vnode, 308 struct afs_server *server, 309 int result) 310 { 311 struct afs_volume *volume = vnode->volume; 312 unsigned loop; 313 314 _enter("%s,%08x,%d", 315 volume->vlocation->vldb.name, ntohl(server->addr.s_addr), 316 result); 317 318 switch (result) { 319 /* success */ 320 case 0: 321 server->fs_act_jif = jiffies; 322 server->fs_state = 0; 323 _leave(""); 324 return 1; 325 326 /* the fileserver denied all knowledge of the volume */ 327 case -ENOMEDIUM: 328 server->fs_act_jif = jiffies; 329 down_write(&volume->server_sem); 330 331 /* firstly, find where the server is in the active list (if it 332 * is) */ 333 for (loop = 0; loop < volume->nservers; loop++) 334 if (volume->servers[loop] == server) 335 goto present; 336 337 /* no longer there - may have been discarded by another op */ 338 goto try_next_server_upw; 339 340 present: 341 volume->nservers--; 342 memmove(&volume->servers[loop], 343 &volume->servers[loop + 1], 344 sizeof(volume->servers[loop]) * 345 (volume->nservers - loop)); 346 volume->servers[volume->nservers] = NULL; 347 afs_put_server(server); 348 volume->rjservers++; 349 350 if (volume->nservers > 0) 351 /* another server might acknowledge its existence */ 352 goto try_next_server_upw; 353 354 /* handle the case where all the fileservers have rejected the 355 * volume 356 * - TODO: try asking the fileservers for volume information 357 * - TODO: contact the VL server again to see if the volume is 358 * no longer registered 359 */ 360 up_write(&volume->server_sem); 361 afs_put_server(server); 362 _leave(" [completely rejected]"); 363 return 1; 364 365 /* problem reaching the server */ 366 case -ENETUNREACH: 367 case -EHOSTUNREACH: 368 case -ECONNREFUSED: 369 case -ETIME: 370 case -ETIMEDOUT: 371 case -EREMOTEIO: 372 /* mark the server as dead 373 * TODO: vary dead timeout depending on error 374 */ 375 spin_lock(&server->fs_lock); 376 if (!server->fs_state) { 377 server->fs_dead_jif = jiffies + HZ * 10; 378 server->fs_state = result; 379 printk("kAFS: SERVER DEAD state=%d\n", result); 380 } 381 spin_unlock(&server->fs_lock); 382 goto try_next_server; 383 384 /* miscellaneous error */ 385 default: 386 server->fs_act_jif = jiffies; 387 case -ENOMEM: 388 case -ENONET: 389 /* tell the caller to accept the result */ 390 afs_put_server(server); 391 _leave(" [local failure]"); 392 return 1; 393 } 394 395 /* tell the caller to loop around and try the next server */ 396 try_next_server_upw: 397 up_write(&volume->server_sem); 398 try_next_server: 399 afs_put_server(server); 400 _leave(" [try next server]"); 401 return 0; 402 } 403