xref: /openbmc/linux/fs/afs/volume.c (revision 0f9b4c3ca5fdf3e177266ef994071b1a03f07318)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS volume management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include "internal.h"
11 
12 static unsigned __read_mostly afs_volume_record_life = 60 * 60;
13 
14 /*
15  * Insert a volume into a cell.  If there's an existing volume record, that is
16  * returned instead with a ref held.
17  */
afs_insert_volume_into_cell(struct afs_cell * cell,struct afs_volume * volume)18 static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
19 						      struct afs_volume *volume)
20 {
21 	struct afs_volume *p;
22 	struct rb_node *parent = NULL, **pp;
23 
24 	write_seqlock(&cell->volume_lock);
25 
26 	pp = &cell->volumes.rb_node;
27 	while (*pp) {
28 		parent = *pp;
29 		p = rb_entry(parent, struct afs_volume, cell_node);
30 		if (p->vid < volume->vid) {
31 			pp = &(*pp)->rb_left;
32 		} else if (p->vid > volume->vid) {
33 			pp = &(*pp)->rb_right;
34 		} else {
35 			if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
36 				volume = p;
37 				goto found;
38 			}
39 
40 			set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
41 			rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
42 		}
43 	}
44 
45 	rb_link_node_rcu(&volume->cell_node, parent, pp);
46 	rb_insert_color(&volume->cell_node, &cell->volumes);
47 	hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
48 
49 found:
50 	write_sequnlock(&cell->volume_lock);
51 	return volume;
52 
53 }
54 
afs_remove_volume_from_cell(struct afs_volume * volume)55 static void afs_remove_volume_from_cell(struct afs_volume *volume)
56 {
57 	struct afs_cell *cell = volume->cell;
58 
59 	if (!hlist_unhashed(&volume->proc_link)) {
60 		trace_afs_volume(volume->vid, refcount_read(&cell->ref),
61 				 afs_volume_trace_remove);
62 		write_seqlock(&cell->volume_lock);
63 		hlist_del_rcu(&volume->proc_link);
64 		if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
65 			rb_erase(&volume->cell_node, &cell->volumes);
66 		write_sequnlock(&cell->volume_lock);
67 	}
68 }
69 
70 /*
71  * Allocate a volume record and load it up from a vldb record.
72  */
afs_alloc_volume(struct afs_fs_context * params,struct afs_vldb_entry * vldb,struct afs_server_list ** _slist)73 static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
74 					   struct afs_vldb_entry *vldb,
75 					   struct afs_server_list **_slist)
76 {
77 	struct afs_server_list *slist;
78 	struct afs_volume *volume;
79 	int ret = -ENOMEM, i;
80 
81 	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
82 	if (!volume)
83 		goto error_0;
84 
85 	volume->vid		= vldb->vid[params->type];
86 	volume->update_at	= ktime_get_real_seconds() + afs_volume_record_life;
87 	volume->cell		= afs_get_cell(params->cell, afs_cell_trace_get_vol);
88 	volume->type		= params->type;
89 	volume->type_force	= params->force;
90 	volume->name_len	= vldb->name_len;
91 
92 	refcount_set(&volume->ref, 1);
93 	INIT_HLIST_NODE(&volume->proc_link);
94 	rwlock_init(&volume->servers_lock);
95 	rwlock_init(&volume->cb_v_break_lock);
96 	memcpy(volume->name, vldb->name, vldb->name_len + 1);
97 
98 	for (i = 0; i < AFS_MAXTYPES; i++)
99 		volume->vids[i] = vldb->vid[i];
100 
101 	slist = afs_alloc_server_list(volume, params->key, vldb);
102 	if (IS_ERR(slist)) {
103 		ret = PTR_ERR(slist);
104 		goto error_1;
105 	}
106 
107 	*_slist = slist;
108 	rcu_assign_pointer(volume->servers, slist);
109 	trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
110 	return volume;
111 
112 error_1:
113 	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
114 	kfree(volume);
115 error_0:
116 	return ERR_PTR(ret);
117 }
118 
119 /*
120  * Look up or allocate a volume record.
121  */
afs_lookup_volume(struct afs_fs_context * params,struct afs_vldb_entry * vldb)122 static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
123 					    struct afs_vldb_entry *vldb)
124 {
125 	struct afs_server_list *slist;
126 	struct afs_volume *candidate, *volume;
127 
128 	candidate = afs_alloc_volume(params, vldb, &slist);
129 	if (IS_ERR(candidate))
130 		return candidate;
131 
132 	volume = afs_insert_volume_into_cell(params->cell, candidate);
133 	if (volume == candidate)
134 		afs_attach_volume_to_servers(volume, slist);
135 	else
136 		afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup);
137 	return volume;
138 }
139 
140 /*
141  * Look up a VLDB record for a volume.
142  */
afs_vl_lookup_vldb(struct afs_cell * cell,struct key * key,const char * volname,size_t volnamesz)143 static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
144 						 struct key *key,
145 						 const char *volname,
146 						 size_t volnamesz)
147 {
148 	struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
149 	struct afs_vl_cursor vc;
150 	int ret;
151 
152 	if (!afs_begin_vlserver_operation(&vc, cell, key))
153 		return ERR_PTR(-ERESTARTSYS);
154 
155 	while (afs_select_vlserver(&vc)) {
156 		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
157 	}
158 
159 	ret = afs_end_vlserver_operation(&vc);
160 	return ret < 0 ? ERR_PTR(ret) : vldb;
161 }
162 
163 /*
164  * Look up a volume in the VL server and create a candidate volume record for
165  * it.
166  *
167  * The volume name can be one of the following:
168  *	"%[cell:]volume[.]"		R/W volume
169  *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0),
170  *					 or R/W (rwparent=1) volume
171  *	"%[cell:]volume.readonly"	R/O volume
172  *	"#[cell:]volume.readonly"	R/O volume
173  *	"%[cell:]volume.backup"		Backup volume
174  *	"#[cell:]volume.backup"		Backup volume
175  *
176  * The cell name is optional, and defaults to the current cell.
177  *
178  * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
179  * Guide
180  * - Rule 1: Explicit type suffix forces access of that type or nothing
181  *           (no suffix, then use Rule 2 & 3)
182  * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
183  *           if not available
184  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
185  *           explicitly told otherwise
186  */
afs_create_volume(struct afs_fs_context * params)187 struct afs_volume *afs_create_volume(struct afs_fs_context *params)
188 {
189 	struct afs_vldb_entry *vldb;
190 	struct afs_volume *volume;
191 	unsigned long type_mask = 1UL << params->type;
192 
193 	vldb = afs_vl_lookup_vldb(params->cell, params->key,
194 				  params->volname, params->volnamesz);
195 	if (IS_ERR(vldb))
196 		return ERR_CAST(vldb);
197 
198 	if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
199 		volume = ERR_PTR(vldb->error);
200 		goto error;
201 	}
202 
203 	/* Make the final decision on the type we want */
204 	volume = ERR_PTR(-ENOMEDIUM);
205 	if (params->force) {
206 		if (!(vldb->flags & type_mask))
207 			goto error;
208 	} else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
209 		params->type = AFSVL_ROVOL;
210 	} else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
211 		params->type = AFSVL_RWVOL;
212 	} else {
213 		goto error;
214 	}
215 
216 	volume = afs_lookup_volume(params, vldb);
217 
218 error:
219 	kfree(vldb);
220 	return volume;
221 }
222 
223 /*
224  * Destroy a volume record
225  */
afs_destroy_volume(struct afs_net * net,struct afs_volume * volume)226 static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
227 {
228 	struct afs_server_list *slist = rcu_access_pointer(volume->servers);
229 
230 	_enter("%p", volume);
231 
232 #ifdef CONFIG_AFS_FSCACHE
233 	ASSERTCMP(volume->cache, ==, NULL);
234 #endif
235 
236 	afs_detach_volume_from_servers(volume, slist);
237 	afs_remove_volume_from_cell(volume);
238 	afs_put_serverlist(net, slist);
239 	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
240 	trace_afs_volume(volume->vid, refcount_read(&volume->ref),
241 			 afs_volume_trace_free);
242 	kfree_rcu(volume, rcu);
243 
244 	_leave(" [destroyed]");
245 }
246 
247 /*
248  * Try to get a reference on a volume record.
249  */
afs_try_get_volume(struct afs_volume * volume,enum afs_volume_trace reason)250 bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
251 {
252 	int r;
253 
254 	if (__refcount_inc_not_zero(&volume->ref, &r)) {
255 		trace_afs_volume(volume->vid, r + 1, reason);
256 		return true;
257 	}
258 	return false;
259 }
260 
261 /*
262  * Get a reference on a volume record.
263  */
afs_get_volume(struct afs_volume * volume,enum afs_volume_trace reason)264 struct afs_volume *afs_get_volume(struct afs_volume *volume,
265 				  enum afs_volume_trace reason)
266 {
267 	if (volume) {
268 		int r;
269 
270 		__refcount_inc(&volume->ref, &r);
271 		trace_afs_volume(volume->vid, r + 1, reason);
272 	}
273 	return volume;
274 }
275 
276 
277 /*
278  * Drop a reference on a volume record.
279  */
afs_put_volume(struct afs_net * net,struct afs_volume * volume,enum afs_volume_trace reason)280 void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
281 		    enum afs_volume_trace reason)
282 {
283 	if (volume) {
284 		afs_volid_t vid = volume->vid;
285 		bool zero;
286 		int r;
287 
288 		zero = __refcount_dec_and_test(&volume->ref, &r);
289 		trace_afs_volume(vid, r - 1, reason);
290 		if (zero)
291 			afs_destroy_volume(net, volume);
292 	}
293 }
294 
295 /*
296  * Activate a volume.
297  */
afs_activate_volume(struct afs_volume * volume)298 int afs_activate_volume(struct afs_volume *volume)
299 {
300 #ifdef CONFIG_AFS_FSCACHE
301 	struct fscache_volume *vcookie;
302 	char *name;
303 
304 	name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
305 			 volume->cell->name, volume->vid);
306 	if (!name)
307 		return -ENOMEM;
308 
309 	vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
310 	if (IS_ERR(vcookie)) {
311 		if (vcookie != ERR_PTR(-EBUSY)) {
312 			kfree(name);
313 			return PTR_ERR(vcookie);
314 		}
315 		pr_err("AFS: Cache volume key already in use (%s)\n", name);
316 		vcookie = NULL;
317 	}
318 	volume->cache = vcookie;
319 	kfree(name);
320 #endif
321 	return 0;
322 }
323 
324 /*
325  * Deactivate a volume.
326  */
afs_deactivate_volume(struct afs_volume * volume)327 void afs_deactivate_volume(struct afs_volume *volume)
328 {
329 	_enter("%s", volume->name);
330 
331 #ifdef CONFIG_AFS_FSCACHE
332 	fscache_relinquish_volume(volume->cache, NULL,
333 				  test_bit(AFS_VOLUME_DELETED, &volume->flags));
334 	volume->cache = NULL;
335 #endif
336 
337 	_leave("");
338 }
339 
340 /*
341  * Query the VL service to update the volume status.
342  */
afs_update_volume_status(struct afs_volume * volume,struct key * key)343 static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
344 {
345 	struct afs_server_list *new, *old, *discard;
346 	struct afs_vldb_entry *vldb;
347 	char idbuf[24];
348 	int ret, idsz;
349 
350 	_enter("");
351 
352 	/* We look up an ID by passing it as a decimal string in the
353 	 * operation's name parameter.
354 	 */
355 	idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
356 
357 	vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
358 	if (IS_ERR(vldb)) {
359 		ret = PTR_ERR(vldb);
360 		goto error;
361 	}
362 
363 	/* See if the volume got renamed. */
364 	if (vldb->name_len != volume->name_len ||
365 	    memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
366 		/* TODO: Use RCU'd string. */
367 		memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
368 		volume->name_len = vldb->name_len;
369 	}
370 
371 	/* See if the volume's server list got updated. */
372 	new = afs_alloc_server_list(volume, key, vldb);
373 	if (IS_ERR(new)) {
374 		ret = PTR_ERR(new);
375 		goto error_vldb;
376 	}
377 
378 	write_lock(&volume->servers_lock);
379 
380 	discard = new;
381 	old = rcu_dereference_protected(volume->servers,
382 					lockdep_is_held(&volume->servers_lock));
383 	if (afs_annotate_server_list(new, old)) {
384 		new->seq = volume->servers_seq + 1;
385 		rcu_assign_pointer(volume->servers, new);
386 		smp_wmb();
387 		volume->servers_seq++;
388 		discard = old;
389 	}
390 
391 	volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
392 	write_unlock(&volume->servers_lock);
393 
394 	if (discard == old)
395 		afs_reattach_volume_to_servers(volume, new, old);
396 	afs_put_serverlist(volume->cell->net, discard);
397 	ret = 0;
398 error_vldb:
399 	kfree(vldb);
400 error:
401 	_leave(" = %d", ret);
402 	return ret;
403 }
404 
405 /*
406  * Make sure the volume record is up to date.
407  */
afs_check_volume_status(struct afs_volume * volume,struct afs_operation * op)408 int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
409 {
410 	int ret, retries = 0;
411 
412 	_enter("");
413 
414 retry:
415 	if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
416 		goto wait;
417 	if (volume->update_at <= ktime_get_real_seconds() ||
418 	    test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
419 		goto update;
420 	_leave(" = 0");
421 	return 0;
422 
423 update:
424 	if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
425 		clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
426 		ret = afs_update_volume_status(volume, op->key);
427 		if (ret < 0)
428 			set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
429 		clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
430 		clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
431 		wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
432 		_leave(" = %d", ret);
433 		return ret;
434 	}
435 
436 wait:
437 	if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
438 		_leave(" = 0 [no wait]");
439 		return 0;
440 	}
441 
442 	ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
443 			  (op->flags & AFS_OPERATION_UNINTR) ?
444 			  TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
445 	if (ret == -ERESTARTSYS) {
446 		_leave(" = %d", ret);
447 		return ret;
448 	}
449 
450 	retries++;
451 	if (retries == 4) {
452 		_leave(" = -ESTALE");
453 		return -ESTALE;
454 	}
455 	goto retry;
456 }
457