xref: /openbmc/linux/fs/xfs/xfs_refcount_item.c (revision 76426e23)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_bit.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_refcount_item.h"
18 #include "xfs_log.h"
19 #include "xfs_refcount.h"
20 #include "xfs_error.h"
21 
22 kmem_zone_t	*xfs_cui_zone;
23 kmem_zone_t	*xfs_cud_zone;
24 
25 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
26 {
27 	return container_of(lip, struct xfs_cui_log_item, cui_item);
28 }
29 
30 void
31 xfs_cui_item_free(
32 	struct xfs_cui_log_item	*cuip)
33 {
34 	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
35 		kmem_free(cuip);
36 	else
37 		kmem_cache_free(xfs_cui_zone, cuip);
38 }
39 
40 /*
41  * Freeing the CUI requires that we remove it from the AIL if it has already
42  * been placed there. However, the CUI may not yet have been placed in the AIL
43  * when called by xfs_cui_release() from CUD processing due to the ordering of
44  * committed vs unpin operations in bulk insert operations. Hence the reference
45  * count to ensure only the last caller frees the CUI.
46  */
47 void
48 xfs_cui_release(
49 	struct xfs_cui_log_item	*cuip)
50 {
51 	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
52 	if (atomic_dec_and_test(&cuip->cui_refcount)) {
53 		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
54 		xfs_cui_item_free(cuip);
55 	}
56 }
57 
58 
59 STATIC void
60 xfs_cui_item_size(
61 	struct xfs_log_item	*lip,
62 	int			*nvecs,
63 	int			*nbytes)
64 {
65 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
66 
67 	*nvecs += 1;
68 	*nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
69 }
70 
71 /*
72  * This is called to fill in the vector of log iovecs for the
73  * given cui log item. We use only 1 iovec, and we point that
74  * at the cui_log_format structure embedded in the cui item.
75  * It is at this point that we assert that all of the extent
76  * slots in the cui item have been filled.
77  */
78 STATIC void
79 xfs_cui_item_format(
80 	struct xfs_log_item	*lip,
81 	struct xfs_log_vec	*lv)
82 {
83 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
84 	struct xfs_log_iovec	*vecp = NULL;
85 
86 	ASSERT(atomic_read(&cuip->cui_next_extent) ==
87 			cuip->cui_format.cui_nextents);
88 
89 	cuip->cui_format.cui_type = XFS_LI_CUI;
90 	cuip->cui_format.cui_size = 1;
91 
92 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
93 			xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
94 }
95 
96 /*
97  * The unpin operation is the last place an CUI is manipulated in the log. It is
98  * either inserted in the AIL or aborted in the event of a log I/O error. In
99  * either case, the CUI transaction has been successfully committed to make it
100  * this far. Therefore, we expect whoever committed the CUI to either construct
101  * and commit the CUD or drop the CUD's reference in the event of error. Simply
102  * drop the log's CUI reference now that the log is done with it.
103  */
104 STATIC void
105 xfs_cui_item_unpin(
106 	struct xfs_log_item	*lip,
107 	int			remove)
108 {
109 	struct xfs_cui_log_item	*cuip = CUI_ITEM(lip);
110 
111 	xfs_cui_release(cuip);
112 }
113 
114 /*
115  * The CUI has been either committed or aborted if the transaction has been
116  * cancelled. If the transaction was cancelled, an CUD isn't going to be
117  * constructed and thus we free the CUI here directly.
118  */
119 STATIC void
120 xfs_cui_item_release(
121 	struct xfs_log_item	*lip)
122 {
123 	xfs_cui_release(CUI_ITEM(lip));
124 }
125 
126 static const struct xfs_item_ops xfs_cui_item_ops = {
127 	.iop_size	= xfs_cui_item_size,
128 	.iop_format	= xfs_cui_item_format,
129 	.iop_unpin	= xfs_cui_item_unpin,
130 	.iop_release	= xfs_cui_item_release,
131 };
132 
133 /*
134  * Allocate and initialize an cui item with the given number of extents.
135  */
136 struct xfs_cui_log_item *
137 xfs_cui_init(
138 	struct xfs_mount		*mp,
139 	uint				nextents)
140 
141 {
142 	struct xfs_cui_log_item		*cuip;
143 
144 	ASSERT(nextents > 0);
145 	if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
146 		cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
147 				0);
148 	else
149 		cuip = kmem_zone_zalloc(xfs_cui_zone, 0);
150 
151 	xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
152 	cuip->cui_format.cui_nextents = nextents;
153 	cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
154 	atomic_set(&cuip->cui_next_extent, 0);
155 	atomic_set(&cuip->cui_refcount, 2);
156 
157 	return cuip;
158 }
159 
160 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
161 {
162 	return container_of(lip, struct xfs_cud_log_item, cud_item);
163 }
164 
165 STATIC void
166 xfs_cud_item_size(
167 	struct xfs_log_item	*lip,
168 	int			*nvecs,
169 	int			*nbytes)
170 {
171 	*nvecs += 1;
172 	*nbytes += sizeof(struct xfs_cud_log_format);
173 }
174 
175 /*
176  * This is called to fill in the vector of log iovecs for the
177  * given cud log item. We use only 1 iovec, and we point that
178  * at the cud_log_format structure embedded in the cud item.
179  * It is at this point that we assert that all of the extent
180  * slots in the cud item have been filled.
181  */
182 STATIC void
183 xfs_cud_item_format(
184 	struct xfs_log_item	*lip,
185 	struct xfs_log_vec	*lv)
186 {
187 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
188 	struct xfs_log_iovec	*vecp = NULL;
189 
190 	cudp->cud_format.cud_type = XFS_LI_CUD;
191 	cudp->cud_format.cud_size = 1;
192 
193 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
194 			sizeof(struct xfs_cud_log_format));
195 }
196 
197 /*
198  * The CUD is either committed or aborted if the transaction is cancelled. If
199  * the transaction is cancelled, drop our reference to the CUI and free the
200  * CUD.
201  */
202 STATIC void
203 xfs_cud_item_release(
204 	struct xfs_log_item	*lip)
205 {
206 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
207 
208 	xfs_cui_release(cudp->cud_cuip);
209 	kmem_cache_free(xfs_cud_zone, cudp);
210 }
211 
212 static const struct xfs_item_ops xfs_cud_item_ops = {
213 	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
214 	.iop_size	= xfs_cud_item_size,
215 	.iop_format	= xfs_cud_item_format,
216 	.iop_release	= xfs_cud_item_release,
217 };
218 
219 static struct xfs_cud_log_item *
220 xfs_trans_get_cud(
221 	struct xfs_trans		*tp,
222 	struct xfs_cui_log_item		*cuip)
223 {
224 	struct xfs_cud_log_item		*cudp;
225 
226 	cudp = kmem_zone_zalloc(xfs_cud_zone, 0);
227 	xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
228 			  &xfs_cud_item_ops);
229 	cudp->cud_cuip = cuip;
230 	cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
231 
232 	xfs_trans_add_item(tp, &cudp->cud_item);
233 	return cudp;
234 }
235 
236 /*
237  * Finish an refcount update and log it to the CUD. Note that the
238  * transaction is marked dirty regardless of whether the refcount
239  * update succeeds or fails to support the CUI/CUD lifecycle rules.
240  */
241 static int
242 xfs_trans_log_finish_refcount_update(
243 	struct xfs_trans		*tp,
244 	struct xfs_cud_log_item		*cudp,
245 	enum xfs_refcount_intent_type	type,
246 	xfs_fsblock_t			startblock,
247 	xfs_extlen_t			blockcount,
248 	xfs_fsblock_t			*new_fsb,
249 	xfs_extlen_t			*new_len,
250 	struct xfs_btree_cur		**pcur)
251 {
252 	int				error;
253 
254 	error = xfs_refcount_finish_one(tp, type, startblock,
255 			blockcount, new_fsb, new_len, pcur);
256 
257 	/*
258 	 * Mark the transaction dirty, even on error. This ensures the
259 	 * transaction is aborted, which:
260 	 *
261 	 * 1.) releases the CUI and frees the CUD
262 	 * 2.) shuts down the filesystem
263 	 */
264 	tp->t_flags |= XFS_TRANS_DIRTY;
265 	set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
266 
267 	return error;
268 }
269 
270 /* Sort refcount intents by AG. */
271 static int
272 xfs_refcount_update_diff_items(
273 	void				*priv,
274 	struct list_head		*a,
275 	struct list_head		*b)
276 {
277 	struct xfs_mount		*mp = priv;
278 	struct xfs_refcount_intent	*ra;
279 	struct xfs_refcount_intent	*rb;
280 
281 	ra = container_of(a, struct xfs_refcount_intent, ri_list);
282 	rb = container_of(b, struct xfs_refcount_intent, ri_list);
283 	return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
284 		XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
285 }
286 
287 /* Get an CUI. */
288 STATIC void *
289 xfs_refcount_update_create_intent(
290 	struct xfs_trans		*tp,
291 	unsigned int			count)
292 {
293 	struct xfs_cui_log_item		*cuip;
294 
295 	ASSERT(tp != NULL);
296 	ASSERT(count > 0);
297 
298 	cuip = xfs_cui_init(tp->t_mountp, count);
299 	ASSERT(cuip != NULL);
300 
301 	/*
302 	 * Get a log_item_desc to point at the new item.
303 	 */
304 	xfs_trans_add_item(tp, &cuip->cui_item);
305 	return cuip;
306 }
307 
308 /* Set the phys extent flags for this reverse mapping. */
309 static void
310 xfs_trans_set_refcount_flags(
311 	struct xfs_phys_extent		*refc,
312 	enum xfs_refcount_intent_type	type)
313 {
314 	refc->pe_flags = 0;
315 	switch (type) {
316 	case XFS_REFCOUNT_INCREASE:
317 	case XFS_REFCOUNT_DECREASE:
318 	case XFS_REFCOUNT_ALLOC_COW:
319 	case XFS_REFCOUNT_FREE_COW:
320 		refc->pe_flags |= type;
321 		break;
322 	default:
323 		ASSERT(0);
324 	}
325 }
326 
327 /* Log refcount updates in the intent item. */
328 STATIC void
329 xfs_refcount_update_log_item(
330 	struct xfs_trans		*tp,
331 	void				*intent,
332 	struct list_head		*item)
333 {
334 	struct xfs_cui_log_item		*cuip = intent;
335 	struct xfs_refcount_intent	*refc;
336 	uint				next_extent;
337 	struct xfs_phys_extent		*ext;
338 
339 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
340 
341 	tp->t_flags |= XFS_TRANS_DIRTY;
342 	set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
343 
344 	/*
345 	 * atomic_inc_return gives us the value after the increment;
346 	 * we want to use it as an array index so we need to subtract 1 from
347 	 * it.
348 	 */
349 	next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
350 	ASSERT(next_extent < cuip->cui_format.cui_nextents);
351 	ext = &cuip->cui_format.cui_extents[next_extent];
352 	ext->pe_startblock = refc->ri_startblock;
353 	ext->pe_len = refc->ri_blockcount;
354 	xfs_trans_set_refcount_flags(ext, refc->ri_type);
355 }
356 
357 /* Get an CUD so we can process all the deferred refcount updates. */
358 STATIC void *
359 xfs_refcount_update_create_done(
360 	struct xfs_trans		*tp,
361 	void				*intent,
362 	unsigned int			count)
363 {
364 	return xfs_trans_get_cud(tp, intent);
365 }
366 
367 /* Process a deferred refcount update. */
368 STATIC int
369 xfs_refcount_update_finish_item(
370 	struct xfs_trans		*tp,
371 	struct list_head		*item,
372 	void				*done_item,
373 	void				**state)
374 {
375 	struct xfs_refcount_intent	*refc;
376 	xfs_fsblock_t			new_fsb;
377 	xfs_extlen_t			new_aglen;
378 	int				error;
379 
380 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
381 	error = xfs_trans_log_finish_refcount_update(tp, done_item,
382 			refc->ri_type,
383 			refc->ri_startblock,
384 			refc->ri_blockcount,
385 			&new_fsb, &new_aglen,
386 			(struct xfs_btree_cur **)state);
387 	/* Did we run out of reservation?  Requeue what we didn't finish. */
388 	if (!error && new_aglen > 0) {
389 		ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
390 		       refc->ri_type == XFS_REFCOUNT_DECREASE);
391 		refc->ri_startblock = new_fsb;
392 		refc->ri_blockcount = new_aglen;
393 		return -EAGAIN;
394 	}
395 	kmem_free(refc);
396 	return error;
397 }
398 
399 /* Clean up after processing deferred refcounts. */
400 STATIC void
401 xfs_refcount_update_finish_cleanup(
402 	struct xfs_trans	*tp,
403 	void			*state,
404 	int			error)
405 {
406 	struct xfs_btree_cur	*rcur = state;
407 
408 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
409 }
410 
411 /* Abort all pending CUIs. */
412 STATIC void
413 xfs_refcount_update_abort_intent(
414 	void				*intent)
415 {
416 	xfs_cui_release(intent);
417 }
418 
419 /* Cancel a deferred refcount update. */
420 STATIC void
421 xfs_refcount_update_cancel_item(
422 	struct list_head		*item)
423 {
424 	struct xfs_refcount_intent	*refc;
425 
426 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
427 	kmem_free(refc);
428 }
429 
430 const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
431 	.max_items	= XFS_CUI_MAX_FAST_EXTENTS,
432 	.diff_items	= xfs_refcount_update_diff_items,
433 	.create_intent	= xfs_refcount_update_create_intent,
434 	.abort_intent	= xfs_refcount_update_abort_intent,
435 	.log_item	= xfs_refcount_update_log_item,
436 	.create_done	= xfs_refcount_update_create_done,
437 	.finish_item	= xfs_refcount_update_finish_item,
438 	.finish_cleanup = xfs_refcount_update_finish_cleanup,
439 	.cancel_item	= xfs_refcount_update_cancel_item,
440 };
441 
442 /*
443  * Process a refcount update intent item that was recovered from the log.
444  * We need to update the refcountbt.
445  */
446 int
447 xfs_cui_recover(
448 	struct xfs_trans		*parent_tp,
449 	struct xfs_cui_log_item		*cuip)
450 {
451 	int				i;
452 	int				error = 0;
453 	unsigned int			refc_type;
454 	struct xfs_phys_extent		*refc;
455 	xfs_fsblock_t			startblock_fsb;
456 	bool				op_ok;
457 	struct xfs_cud_log_item		*cudp;
458 	struct xfs_trans		*tp;
459 	struct xfs_btree_cur		*rcur = NULL;
460 	enum xfs_refcount_intent_type	type;
461 	xfs_fsblock_t			new_fsb;
462 	xfs_extlen_t			new_len;
463 	struct xfs_bmbt_irec		irec;
464 	bool				requeue_only = false;
465 	struct xfs_mount		*mp = parent_tp->t_mountp;
466 
467 	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
468 
469 	/*
470 	 * First check the validity of the extents described by the
471 	 * CUI.  If any are bad, then assume that all are bad and
472 	 * just toss the CUI.
473 	 */
474 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
475 		refc = &cuip->cui_format.cui_extents[i];
476 		startblock_fsb = XFS_BB_TO_FSB(mp,
477 				   XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
478 		switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
479 		case XFS_REFCOUNT_INCREASE:
480 		case XFS_REFCOUNT_DECREASE:
481 		case XFS_REFCOUNT_ALLOC_COW:
482 		case XFS_REFCOUNT_FREE_COW:
483 			op_ok = true;
484 			break;
485 		default:
486 			op_ok = false;
487 			break;
488 		}
489 		if (!op_ok || startblock_fsb == 0 ||
490 		    refc->pe_len == 0 ||
491 		    startblock_fsb >= mp->m_sb.sb_dblocks ||
492 		    refc->pe_len >= mp->m_sb.sb_agblocks ||
493 		    (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
494 			/*
495 			 * This will pull the CUI from the AIL and
496 			 * free the memory associated with it.
497 			 */
498 			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
499 			xfs_cui_release(cuip);
500 			return -EFSCORRUPTED;
501 		}
502 	}
503 
504 	/*
505 	 * Under normal operation, refcount updates are deferred, so we
506 	 * wouldn't be adding them directly to a transaction.  All
507 	 * refcount updates manage reservation usage internally and
508 	 * dynamically by deferring work that won't fit in the
509 	 * transaction.  Normally, any work that needs to be deferred
510 	 * gets attached to the same defer_ops that scheduled the
511 	 * refcount update.  However, we're in log recovery here, so we
512 	 * we use the passed in defer_ops and to finish up any work that
513 	 * doesn't fit.  We need to reserve enough blocks to handle a
514 	 * full btree split on either end of the refcount range.
515 	 */
516 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
517 			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
518 	if (error)
519 		return error;
520 	/*
521 	 * Recovery stashes all deferred ops during intent processing and
522 	 * finishes them on completion. Transfer current dfops state to this
523 	 * transaction and transfer the result back before we return.
524 	 */
525 	xfs_defer_move(tp, parent_tp);
526 	cudp = xfs_trans_get_cud(tp, cuip);
527 
528 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
529 		refc = &cuip->cui_format.cui_extents[i];
530 		refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
531 		switch (refc_type) {
532 		case XFS_REFCOUNT_INCREASE:
533 		case XFS_REFCOUNT_DECREASE:
534 		case XFS_REFCOUNT_ALLOC_COW:
535 		case XFS_REFCOUNT_FREE_COW:
536 			type = refc_type;
537 			break;
538 		default:
539 			XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
540 			error = -EFSCORRUPTED;
541 			goto abort_error;
542 		}
543 		if (requeue_only) {
544 			new_fsb = refc->pe_startblock;
545 			new_len = refc->pe_len;
546 		} else
547 			error = xfs_trans_log_finish_refcount_update(tp, cudp,
548 				type, refc->pe_startblock, refc->pe_len,
549 				&new_fsb, &new_len, &rcur);
550 		if (error)
551 			goto abort_error;
552 
553 		/* Requeue what we didn't finish. */
554 		if (new_len > 0) {
555 			irec.br_startblock = new_fsb;
556 			irec.br_blockcount = new_len;
557 			switch (type) {
558 			case XFS_REFCOUNT_INCREASE:
559 				xfs_refcount_increase_extent(tp, &irec);
560 				break;
561 			case XFS_REFCOUNT_DECREASE:
562 				xfs_refcount_decrease_extent(tp, &irec);
563 				break;
564 			case XFS_REFCOUNT_ALLOC_COW:
565 				xfs_refcount_alloc_cow_extent(tp,
566 						irec.br_startblock,
567 						irec.br_blockcount);
568 				break;
569 			case XFS_REFCOUNT_FREE_COW:
570 				xfs_refcount_free_cow_extent(tp,
571 						irec.br_startblock,
572 						irec.br_blockcount);
573 				break;
574 			default:
575 				ASSERT(0);
576 			}
577 			requeue_only = true;
578 		}
579 	}
580 
581 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
582 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
583 	xfs_defer_move(parent_tp, tp);
584 	error = xfs_trans_commit(tp);
585 	return error;
586 
587 abort_error:
588 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
589 	xfs_defer_move(parent_tp, tp);
590 	xfs_trans_cancel(tp);
591 	return error;
592 }
593