1e06536a6SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
2e06536a6SDarrick J. Wong /*
3e06536a6SDarrick J. Wong  * Copyright (C) 2020 Oracle.  All Rights Reserved.
4e06536a6SDarrick J. Wong  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5e06536a6SDarrick J. Wong  */
6e06536a6SDarrick J. Wong #include "xfs.h"
7e06536a6SDarrick J. Wong #include "xfs_fs.h"
8e06536a6SDarrick J. Wong #include "xfs_shared.h"
9e06536a6SDarrick J. Wong #include "xfs_format.h"
10e06536a6SDarrick J. Wong #include "xfs_log_format.h"
11e06536a6SDarrick J. Wong #include "xfs_trans_resv.h"
12e06536a6SDarrick J. Wong #include "xfs_bit.h"
13e06536a6SDarrick J. Wong #include "xfs_mount.h"
14e06536a6SDarrick J. Wong #include "xfs_inode.h"
15e06536a6SDarrick J. Wong #include "xfs_trans.h"
16e06536a6SDarrick J. Wong #include "xfs_btree.h"
17e06536a6SDarrick J. Wong #include "xfs_trace.h"
18e06536a6SDarrick J. Wong #include "xfs_btree_staging.h"
19e06536a6SDarrick J. Wong 
20e06536a6SDarrick J. Wong /*
21e06536a6SDarrick J. Wong  * Staging Cursors and Fake Roots for Btrees
22e06536a6SDarrick J. Wong  * =========================================
23e06536a6SDarrick J. Wong  *
24e06536a6SDarrick J. Wong  * A staging btree cursor is a special type of btree cursor that callers must
25e06536a6SDarrick J. Wong  * use to construct a new btree index using the btree bulk loader code.  The
26e06536a6SDarrick J. Wong  * bulk loading code uses the staging btree cursor to abstract the details of
27e06536a6SDarrick J. Wong  * initializing new btree blocks and filling them with records or key/ptr
28e06536a6SDarrick J. Wong  * pairs.  Regular btree operations (e.g. queries and modifications) are not
29e06536a6SDarrick J. Wong  * supported with staging cursors, and callers must not invoke them.
30e06536a6SDarrick J. Wong  *
31e06536a6SDarrick J. Wong  * Fake root structures contain all the information about a btree that is under
32e06536a6SDarrick J. Wong  * construction by the bulk loading code.  Staging btree cursors point to fake
33e06536a6SDarrick J. Wong  * root structures instead of the usual AG header or inode structure.
34e06536a6SDarrick J. Wong  *
35e06536a6SDarrick J. Wong  * Callers are expected to initialize a fake root structure and pass it into
36e06536a6SDarrick J. Wong  * the _stage_cursor function for a specific btree type.  When bulk loading is
37e06536a6SDarrick J. Wong  * complete, callers should call the _commit_staged_btree function for that
38e06536a6SDarrick J. Wong  * specific btree type to commit the new btree into the filesystem.
39e06536a6SDarrick J. Wong  */
40e06536a6SDarrick J. Wong 
41e06536a6SDarrick J. Wong /*
42e06536a6SDarrick J. Wong  * Don't allow staging cursors to be duplicated because they're supposed to be
43e06536a6SDarrick J. Wong  * kept private to a single thread.
44e06536a6SDarrick J. Wong  */
45e06536a6SDarrick J. Wong STATIC struct xfs_btree_cur *
xfs_btree_fakeroot_dup_cursor(struct xfs_btree_cur * cur)46e06536a6SDarrick J. Wong xfs_btree_fakeroot_dup_cursor(
47e06536a6SDarrick J. Wong 	struct xfs_btree_cur	*cur)
48e06536a6SDarrick J. Wong {
49e06536a6SDarrick J. Wong 	ASSERT(0);
50e06536a6SDarrick J. Wong 	return NULL;
51e06536a6SDarrick J. Wong }
52e06536a6SDarrick J. Wong 
53e06536a6SDarrick J. Wong /*
54e06536a6SDarrick J. Wong  * Don't allow block allocation for a staging cursor, because staging cursors
55e06536a6SDarrick J. Wong  * do not support regular btree modifications.
56e06536a6SDarrick J. Wong  *
57e06536a6SDarrick J. Wong  * Bulk loading uses a separate callback to obtain new blocks from a
58e06536a6SDarrick J. Wong  * preallocated list, which prevents ENOSPC failures during loading.
59e06536a6SDarrick J. Wong  */
60e06536a6SDarrick J. Wong STATIC int
xfs_btree_fakeroot_alloc_block(struct xfs_btree_cur * cur,const union xfs_btree_ptr * start_bno,union xfs_btree_ptr * new_bno,int * stat)61e06536a6SDarrick J. Wong xfs_btree_fakeroot_alloc_block(
62e06536a6SDarrick J. Wong 	struct xfs_btree_cur		*cur,
63deb06b9aSDarrick J. Wong 	const union xfs_btree_ptr	*start_bno,
64e06536a6SDarrick J. Wong 	union xfs_btree_ptr		*new_bno,
65e06536a6SDarrick J. Wong 	int				*stat)
66e06536a6SDarrick J. Wong {
67e06536a6SDarrick J. Wong 	ASSERT(0);
68e06536a6SDarrick J. Wong 	return -EFSCORRUPTED;
69e06536a6SDarrick J. Wong }
70e06536a6SDarrick J. Wong 
71e06536a6SDarrick J. Wong /*
72e06536a6SDarrick J. Wong  * Don't allow block freeing for a staging cursor, because staging cursors
73e06536a6SDarrick J. Wong  * do not support regular btree modifications.
74e06536a6SDarrick J. Wong  */
75e06536a6SDarrick J. Wong STATIC int
xfs_btree_fakeroot_free_block(struct xfs_btree_cur * cur,struct xfs_buf * bp)76e06536a6SDarrick J. Wong xfs_btree_fakeroot_free_block(
77e06536a6SDarrick J. Wong 	struct xfs_btree_cur	*cur,
78e06536a6SDarrick J. Wong 	struct xfs_buf		*bp)
79e06536a6SDarrick J. Wong {
80e06536a6SDarrick J. Wong 	ASSERT(0);
81e06536a6SDarrick J. Wong 	return -EFSCORRUPTED;
82e06536a6SDarrick J. Wong }
83e06536a6SDarrick J. Wong 
84e06536a6SDarrick J. Wong /* Initialize a pointer to the root block from the fakeroot. */
85e06536a6SDarrick J. Wong STATIC void
xfs_btree_fakeroot_init_ptr_from_cur(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)86e06536a6SDarrick J. Wong xfs_btree_fakeroot_init_ptr_from_cur(
87e06536a6SDarrick J. Wong 	struct xfs_btree_cur	*cur,
88e06536a6SDarrick J. Wong 	union xfs_btree_ptr	*ptr)
89e06536a6SDarrick J. Wong {
90e06536a6SDarrick J. Wong 	struct xbtree_afakeroot	*afake;
91e06536a6SDarrick J. Wong 
92e06536a6SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
93e06536a6SDarrick J. Wong 
94e06536a6SDarrick J. Wong 	afake = cur->bc_ag.afake;
95e06536a6SDarrick J. Wong 	ptr->s = cpu_to_be32(afake->af_root);
96e06536a6SDarrick J. Wong }
97e06536a6SDarrick J. Wong 
98e06536a6SDarrick J. Wong /*
99e06536a6SDarrick J. Wong  * Bulk Loading for AG Btrees
100e06536a6SDarrick J. Wong  * ==========================
101e06536a6SDarrick J. Wong  *
102e06536a6SDarrick J. Wong  * For a btree rooted in an AG header, pass a xbtree_afakeroot structure to the
103e06536a6SDarrick J. Wong  * staging cursor.  Callers should initialize this to zero.
104e06536a6SDarrick J. Wong  *
105e06536a6SDarrick J. Wong  * The _stage_cursor() function for a specific btree type should call
106e06536a6SDarrick J. Wong  * xfs_btree_stage_afakeroot to set up the in-memory cursor as a staging
107e06536a6SDarrick J. Wong  * cursor.  The corresponding _commit_staged_btree() function should log the
108e06536a6SDarrick J. Wong  * new root and call xfs_btree_commit_afakeroot() to transform the staging
109e06536a6SDarrick J. Wong  * cursor into a regular btree cursor.
110e06536a6SDarrick J. Wong  */
111e06536a6SDarrick J. Wong 
112e06536a6SDarrick J. Wong /* Update the btree root information for a per-AG fake root. */
113e06536a6SDarrick J. Wong STATIC void
xfs_btree_afakeroot_set_root(struct xfs_btree_cur * cur,const union xfs_btree_ptr * ptr,int inc)114e06536a6SDarrick J. Wong xfs_btree_afakeroot_set_root(
115e06536a6SDarrick J. Wong 	struct xfs_btree_cur		*cur,
116b5a6e5feSDarrick J. Wong 	const union xfs_btree_ptr	*ptr,
117e06536a6SDarrick J. Wong 	int				inc)
118e06536a6SDarrick J. Wong {
119e06536a6SDarrick J. Wong 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
120e06536a6SDarrick J. Wong 
121e06536a6SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
122e06536a6SDarrick J. Wong 	afake->af_root = be32_to_cpu(ptr->s);
123e06536a6SDarrick J. Wong 	afake->af_levels += inc;
124e06536a6SDarrick J. Wong }
125e06536a6SDarrick J. Wong 
126e06536a6SDarrick J. Wong /*
127e06536a6SDarrick J. Wong  * Initialize a AG-rooted btree cursor with the given AG btree fake root.
128e06536a6SDarrick J. Wong  * The btree cursor's bc_ops will be overridden as needed to make the staging
129e06536a6SDarrick J. Wong  * functionality work.
130e06536a6SDarrick J. Wong  */
131e06536a6SDarrick J. Wong void
xfs_btree_stage_afakeroot(struct xfs_btree_cur * cur,struct xbtree_afakeroot * afake)132e06536a6SDarrick J. Wong xfs_btree_stage_afakeroot(
133e06536a6SDarrick J. Wong 	struct xfs_btree_cur		*cur,
134e06536a6SDarrick J. Wong 	struct xbtree_afakeroot		*afake)
135e06536a6SDarrick J. Wong {
136e06536a6SDarrick J. Wong 	struct xfs_btree_ops		*nops;
137e06536a6SDarrick J. Wong 
138e06536a6SDarrick J. Wong 	ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING));
139e06536a6SDarrick J. Wong 	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE));
140e06536a6SDarrick J. Wong 	ASSERT(cur->bc_tp == NULL);
141e06536a6SDarrick J. Wong 
142e06536a6SDarrick J. Wong 	nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
143e06536a6SDarrick J. Wong 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
144e06536a6SDarrick J. Wong 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
145e06536a6SDarrick J. Wong 	nops->free_block = xfs_btree_fakeroot_free_block;
146e06536a6SDarrick J. Wong 	nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur;
147e06536a6SDarrick J. Wong 	nops->set_root = xfs_btree_afakeroot_set_root;
148e06536a6SDarrick J. Wong 	nops->dup_cursor = xfs_btree_fakeroot_dup_cursor;
149e06536a6SDarrick J. Wong 
150e06536a6SDarrick J. Wong 	cur->bc_ag.afake = afake;
151e06536a6SDarrick J. Wong 	cur->bc_nlevels = afake->af_levels;
152e06536a6SDarrick J. Wong 	cur->bc_ops = nops;
153e06536a6SDarrick J. Wong 	cur->bc_flags |= XFS_BTREE_STAGING;
154e06536a6SDarrick J. Wong }
155e06536a6SDarrick J. Wong 
156e06536a6SDarrick J. Wong /*
157e06536a6SDarrick J. Wong  * Transform an AG-rooted staging btree cursor back into a regular cursor by
158e06536a6SDarrick J. Wong  * substituting a real btree root for the fake one and restoring normal btree
159e06536a6SDarrick J. Wong  * cursor ops.  The caller must log the btree root change prior to calling
160e06536a6SDarrick J. Wong  * this.
161e06536a6SDarrick J. Wong  */
162e06536a6SDarrick J. Wong void
xfs_btree_commit_afakeroot(struct xfs_btree_cur * cur,struct xfs_trans * tp,struct xfs_buf * agbp,const struct xfs_btree_ops * ops)163e06536a6SDarrick J. Wong xfs_btree_commit_afakeroot(
164e06536a6SDarrick J. Wong 	struct xfs_btree_cur		*cur,
165e06536a6SDarrick J. Wong 	struct xfs_trans		*tp,
166e06536a6SDarrick J. Wong 	struct xfs_buf			*agbp,
167e06536a6SDarrick J. Wong 	const struct xfs_btree_ops	*ops)
168e06536a6SDarrick J. Wong {
169e06536a6SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
170e06536a6SDarrick J. Wong 	ASSERT(cur->bc_tp == NULL);
171e06536a6SDarrick J. Wong 
172e06536a6SDarrick J. Wong 	trace_xfs_btree_commit_afakeroot(cur);
173e06536a6SDarrick J. Wong 
174e06536a6SDarrick J. Wong 	kmem_free((void *)cur->bc_ops);
175e06536a6SDarrick J. Wong 	cur->bc_ag.agbp = agbp;
176e06536a6SDarrick J. Wong 	cur->bc_ops = ops;
177e06536a6SDarrick J. Wong 	cur->bc_flags &= ~XFS_BTREE_STAGING;
178e06536a6SDarrick J. Wong 	cur->bc_tp = tp;
179e06536a6SDarrick J. Wong }
180349e1c03SDarrick J. Wong 
181349e1c03SDarrick J. Wong /*
182349e1c03SDarrick J. Wong  * Bulk Loading for Inode-Rooted Btrees
183349e1c03SDarrick J. Wong  * ====================================
184349e1c03SDarrick J. Wong  *
185349e1c03SDarrick J. Wong  * For a btree rooted in an inode fork, pass a xbtree_ifakeroot structure to
186349e1c03SDarrick J. Wong  * the staging cursor.  This structure should be initialized as follows:
187349e1c03SDarrick J. Wong  *
188349e1c03SDarrick J. Wong  * - if_fork_size field should be set to the number of bytes available to the
189349e1c03SDarrick J. Wong  *   fork in the inode.
190349e1c03SDarrick J. Wong  *
191349e1c03SDarrick J. Wong  * - if_fork should point to a freshly allocated struct xfs_ifork.
192349e1c03SDarrick J. Wong  *
193349e1c03SDarrick J. Wong  * - if_format should be set to the appropriate fork type (e.g.
194349e1c03SDarrick J. Wong  *   XFS_DINODE_FMT_BTREE).
195349e1c03SDarrick J. Wong  *
196349e1c03SDarrick J. Wong  * All other fields must be zero.
197349e1c03SDarrick J. Wong  *
198349e1c03SDarrick J. Wong  * The _stage_cursor() function for a specific btree type should call
199349e1c03SDarrick J. Wong  * xfs_btree_stage_ifakeroot to set up the in-memory cursor as a staging
200349e1c03SDarrick J. Wong  * cursor.  The corresponding _commit_staged_btree() function should log the
201349e1c03SDarrick J. Wong  * new root and call xfs_btree_commit_ifakeroot() to transform the staging
202349e1c03SDarrick J. Wong  * cursor into a regular btree cursor.
203349e1c03SDarrick J. Wong  */
204349e1c03SDarrick J. Wong 
205349e1c03SDarrick J. Wong /*
206349e1c03SDarrick J. Wong  * Initialize an inode-rooted btree cursor with the given inode btree fake
207349e1c03SDarrick J. Wong  * root.  The btree cursor's bc_ops will be overridden as needed to make the
208349e1c03SDarrick J. Wong  * staging functionality work.  If new_ops is not NULL, these new ops will be
209349e1c03SDarrick J. Wong  * passed out to the caller for further overriding.
210349e1c03SDarrick J. Wong  */
211349e1c03SDarrick J. Wong void
xfs_btree_stage_ifakeroot(struct xfs_btree_cur * cur,struct xbtree_ifakeroot * ifake,struct xfs_btree_ops ** new_ops)212349e1c03SDarrick J. Wong xfs_btree_stage_ifakeroot(
213349e1c03SDarrick J. Wong 	struct xfs_btree_cur		*cur,
214349e1c03SDarrick J. Wong 	struct xbtree_ifakeroot		*ifake,
215349e1c03SDarrick J. Wong 	struct xfs_btree_ops		**new_ops)
216349e1c03SDarrick J. Wong {
217349e1c03SDarrick J. Wong 	struct xfs_btree_ops		*nops;
218349e1c03SDarrick J. Wong 
219349e1c03SDarrick J. Wong 	ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING));
220349e1c03SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
221349e1c03SDarrick J. Wong 	ASSERT(cur->bc_tp == NULL);
222349e1c03SDarrick J. Wong 
223349e1c03SDarrick J. Wong 	nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
224349e1c03SDarrick J. Wong 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
225349e1c03SDarrick J. Wong 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
226349e1c03SDarrick J. Wong 	nops->free_block = xfs_btree_fakeroot_free_block;
227349e1c03SDarrick J. Wong 	nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur;
228349e1c03SDarrick J. Wong 	nops->dup_cursor = xfs_btree_fakeroot_dup_cursor;
229349e1c03SDarrick J. Wong 
230349e1c03SDarrick J. Wong 	cur->bc_ino.ifake = ifake;
231349e1c03SDarrick J. Wong 	cur->bc_nlevels = ifake->if_levels;
232349e1c03SDarrick J. Wong 	cur->bc_ops = nops;
233349e1c03SDarrick J. Wong 	cur->bc_flags |= XFS_BTREE_STAGING;
234349e1c03SDarrick J. Wong 
235349e1c03SDarrick J. Wong 	if (new_ops)
236349e1c03SDarrick J. Wong 		*new_ops = nops;
237349e1c03SDarrick J. Wong }
238349e1c03SDarrick J. Wong 
239349e1c03SDarrick J. Wong /*
240349e1c03SDarrick J. Wong  * Transform an inode-rooted staging btree cursor back into a regular cursor by
241349e1c03SDarrick J. Wong  * substituting a real btree root for the fake one and restoring normal btree
242349e1c03SDarrick J. Wong  * cursor ops.  The caller must log the btree root change prior to calling
243349e1c03SDarrick J. Wong  * this.
244349e1c03SDarrick J. Wong  */
245349e1c03SDarrick J. Wong void
xfs_btree_commit_ifakeroot(struct xfs_btree_cur * cur,struct xfs_trans * tp,int whichfork,const struct xfs_btree_ops * ops)246349e1c03SDarrick J. Wong xfs_btree_commit_ifakeroot(
247349e1c03SDarrick J. Wong 	struct xfs_btree_cur		*cur,
248349e1c03SDarrick J. Wong 	struct xfs_trans		*tp,
249349e1c03SDarrick J. Wong 	int				whichfork,
250349e1c03SDarrick J. Wong 	const struct xfs_btree_ops	*ops)
251349e1c03SDarrick J. Wong {
252349e1c03SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
253349e1c03SDarrick J. Wong 	ASSERT(cur->bc_tp == NULL);
254349e1c03SDarrick J. Wong 
255349e1c03SDarrick J. Wong 	trace_xfs_btree_commit_ifakeroot(cur);
256349e1c03SDarrick J. Wong 
257349e1c03SDarrick J. Wong 	kmem_free((void *)cur->bc_ops);
258349e1c03SDarrick J. Wong 	cur->bc_ino.ifake = NULL;
259349e1c03SDarrick J. Wong 	cur->bc_ino.whichfork = whichfork;
260349e1c03SDarrick J. Wong 	cur->bc_ops = ops;
261349e1c03SDarrick J. Wong 	cur->bc_flags &= ~XFS_BTREE_STAGING;
262349e1c03SDarrick J. Wong 	cur->bc_tp = tp;
263349e1c03SDarrick J. Wong }
26460e3d707SDarrick J. Wong 
26560e3d707SDarrick J. Wong /*
26660e3d707SDarrick J. Wong  * Bulk Loading of Staged Btrees
26760e3d707SDarrick J. Wong  * =============================
26860e3d707SDarrick J. Wong  *
26960e3d707SDarrick J. Wong  * This interface is used with a staged btree cursor to create a totally new
27060e3d707SDarrick J. Wong  * btree with a large number of records (i.e. more than what would fit in a
27160e3d707SDarrick J. Wong  * single root block).  When the creation is complete, the new root can be
27260e3d707SDarrick J. Wong  * linked atomically into the filesystem by committing the staged cursor.
27360e3d707SDarrick J. Wong  *
27460e3d707SDarrick J. Wong  * Creation of a new btree proceeds roughly as follows:
27560e3d707SDarrick J. Wong  *
27660e3d707SDarrick J. Wong  * The first step is to initialize an appropriate fake btree root structure and
27760e3d707SDarrick J. Wong  * then construct a staged btree cursor.  Refer to the block comments about
27860e3d707SDarrick J. Wong  * "Bulk Loading for AG Btrees" and "Bulk Loading for Inode-Rooted Btrees" for
27960e3d707SDarrick J. Wong  * more information about how to do this.
28060e3d707SDarrick J. Wong  *
28160e3d707SDarrick J. Wong  * The second step is to initialize a struct xfs_btree_bload context as
28260e3d707SDarrick J. Wong  * documented in the structure definition.
28360e3d707SDarrick J. Wong  *
28460e3d707SDarrick J. Wong  * The third step is to call xfs_btree_bload_compute_geometry to compute the
28560e3d707SDarrick J. Wong  * height of and the number of blocks needed to construct the btree.  See the
28660e3d707SDarrick J. Wong  * section "Computing the Geometry of the New Btree" for details about this
28760e3d707SDarrick J. Wong  * computation.
28860e3d707SDarrick J. Wong  *
28960e3d707SDarrick J. Wong  * In step four, the caller must allocate xfs_btree_bload.nr_blocks blocks and
29060e3d707SDarrick J. Wong  * save them for later use by ->claim_block().  Bulk loading requires all
29160e3d707SDarrick J. Wong  * blocks to be allocated beforehand to avoid ENOSPC failures midway through a
29260e3d707SDarrick J. Wong  * rebuild, and to minimize seek distances of the new btree.
29360e3d707SDarrick J. Wong  *
29460e3d707SDarrick J. Wong  * Step five is to call xfs_btree_bload() to start constructing the btree.
29560e3d707SDarrick J. Wong  *
29660e3d707SDarrick J. Wong  * The final step is to commit the staging btree cursor, which logs the new
29760e3d707SDarrick J. Wong  * btree root and turns the staging cursor into a regular cursor.  The caller
29860e3d707SDarrick J. Wong  * is responsible for cleaning up the previous btree blocks, if any.
29960e3d707SDarrick J. Wong  *
30060e3d707SDarrick J. Wong  * Computing the Geometry of the New Btree
30160e3d707SDarrick J. Wong  * =======================================
30260e3d707SDarrick J. Wong  *
30360e3d707SDarrick J. Wong  * The number of items placed in each btree block is computed via the following
30460e3d707SDarrick J. Wong  * algorithm: For leaf levels, the number of items for the level is nr_records
30560e3d707SDarrick J. Wong  * in the bload structure.  For node levels, the number of items for the level
30660e3d707SDarrick J. Wong  * is the number of blocks in the next lower level of the tree.  For each
30760e3d707SDarrick J. Wong  * level, the desired number of items per block is defined as:
30860e3d707SDarrick J. Wong  *
30960e3d707SDarrick J. Wong  * desired = max(minrecs, maxrecs - slack factor)
31060e3d707SDarrick J. Wong  *
31160e3d707SDarrick J. Wong  * The number of blocks for the level is defined to be:
31260e3d707SDarrick J. Wong  *
31360e3d707SDarrick J. Wong  * blocks = floor(nr_items / desired)
31460e3d707SDarrick J. Wong  *
31560e3d707SDarrick J. Wong  * Note this is rounded down so that the npb calculation below will never fall
31660e3d707SDarrick J. Wong  * below minrecs.  The number of items that will actually be loaded into each
31760e3d707SDarrick J. Wong  * btree block is defined as:
31860e3d707SDarrick J. Wong  *
31960e3d707SDarrick J. Wong  * npb =  nr_items / blocks
32060e3d707SDarrick J. Wong  *
32160e3d707SDarrick J. Wong  * Some of the leftmost blocks in the level will contain one extra record as
32260e3d707SDarrick J. Wong  * needed to handle uneven division.  If the number of records in any block
32360e3d707SDarrick J. Wong  * would exceed maxrecs for that level, blocks is incremented and npb is
32460e3d707SDarrick J. Wong  * recalculated.
32560e3d707SDarrick J. Wong  *
32660e3d707SDarrick J. Wong  * In other words, we compute the number of blocks needed to satisfy a given
32760e3d707SDarrick J. Wong  * loading level, then spread the items as evenly as possible.
32860e3d707SDarrick J. Wong  *
32960e3d707SDarrick J. Wong  * The height and number of fs blocks required to create the btree are computed
33060e3d707SDarrick J. Wong  * and returned via btree_height and nr_blocks.
33160e3d707SDarrick J. Wong  */
33260e3d707SDarrick J. Wong 
33360e3d707SDarrick J. Wong /*
33460e3d707SDarrick J. Wong  * Put a btree block that we're loading onto the ordered list and release it.
33560e3d707SDarrick J. Wong  * The btree blocks will be written to disk when bulk loading is finished.
33660e3d707SDarrick J. Wong  */
33760e3d707SDarrick J. Wong static void
xfs_btree_bload_drop_buf(struct list_head * buffers_list,struct xfs_buf ** bpp)33860e3d707SDarrick J. Wong xfs_btree_bload_drop_buf(
33960e3d707SDarrick J. Wong 	struct list_head	*buffers_list,
34060e3d707SDarrick J. Wong 	struct xfs_buf		**bpp)
34160e3d707SDarrick J. Wong {
34260e3d707SDarrick J. Wong 	if (*bpp == NULL)
34360e3d707SDarrick J. Wong 		return;
34460e3d707SDarrick J. Wong 
3451a48327cSDarrick J. Wong 	xfs_buf_delwri_queue_here(*bpp, buffers_list);
34660e3d707SDarrick J. Wong 	xfs_buf_relse(*bpp);
34760e3d707SDarrick J. Wong 	*bpp = NULL;
34860e3d707SDarrick J. Wong }
34960e3d707SDarrick J. Wong 
35060e3d707SDarrick J. Wong /*
35160e3d707SDarrick J. Wong  * Allocate and initialize one btree block for bulk loading.
35260e3d707SDarrick J. Wong  *
35360e3d707SDarrick J. Wong  * The new btree block will have its level and numrecs fields set to the values
35460e3d707SDarrick J. Wong  * of the level and nr_this_block parameters, respectively.
35560e3d707SDarrick J. Wong  *
35660e3d707SDarrick J. Wong  * The caller should ensure that ptrp, bpp, and blockp refer to the left
35760e3d707SDarrick J. Wong  * sibling of the new block, if there is any.  On exit, ptrp, bpp, and blockp
35860e3d707SDarrick J. Wong  * will all point to the new block.
35960e3d707SDarrick J. Wong  */
36060e3d707SDarrick J. Wong STATIC int
xfs_btree_bload_prep_block(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,struct list_head * buffers_list,unsigned int level,unsigned int nr_this_block,union xfs_btree_ptr * ptrp,struct xfs_buf ** bpp,struct xfs_btree_block ** blockp,void * priv)36160e3d707SDarrick J. Wong xfs_btree_bload_prep_block(
36260e3d707SDarrick J. Wong 	struct xfs_btree_cur		*cur,
36360e3d707SDarrick J. Wong 	struct xfs_btree_bload		*bbl,
36460e3d707SDarrick J. Wong 	struct list_head		*buffers_list,
36560e3d707SDarrick J. Wong 	unsigned int			level,
36660e3d707SDarrick J. Wong 	unsigned int			nr_this_block,
36760e3d707SDarrick J. Wong 	union xfs_btree_ptr		*ptrp, /* in/out */
36860e3d707SDarrick J. Wong 	struct xfs_buf			**bpp, /* in/out */
36960e3d707SDarrick J. Wong 	struct xfs_btree_block		**blockp, /* in/out */
37060e3d707SDarrick J. Wong 	void				*priv)
37160e3d707SDarrick J. Wong {
37260e3d707SDarrick J. Wong 	union xfs_btree_ptr		new_ptr;
37360e3d707SDarrick J. Wong 	struct xfs_buf			*new_bp;
37460e3d707SDarrick J. Wong 	struct xfs_btree_block		*new_block;
37560e3d707SDarrick J. Wong 	int				ret;
37660e3d707SDarrick J. Wong 
37760e3d707SDarrick J. Wong 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
37860e3d707SDarrick J. Wong 	    level == cur->bc_nlevels - 1) {
37960e3d707SDarrick J. Wong 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
38060e3d707SDarrick J. Wong 		size_t			new_size;
38160e3d707SDarrick J. Wong 
38260e3d707SDarrick J. Wong 		ASSERT(*bpp == NULL);
38360e3d707SDarrick J. Wong 
38460e3d707SDarrick J. Wong 		/* Allocate a new incore btree root block. */
38560e3d707SDarrick J. Wong 		new_size = bbl->iroot_size(cur, nr_this_block, priv);
38660e3d707SDarrick J. Wong 		ifp->if_broot = kmem_zalloc(new_size, 0);
38760e3d707SDarrick J. Wong 		ifp->if_broot_bytes = (int)new_size;
38860e3d707SDarrick J. Wong 
38960e3d707SDarrick J. Wong 		/* Initialize it and send it out. */
39060e3d707SDarrick J. Wong 		xfs_btree_init_block_int(cur->bc_mp, ifp->if_broot,
39160e3d707SDarrick J. Wong 				XFS_BUF_DADDR_NULL, cur->bc_btnum, level,
39260e3d707SDarrick J. Wong 				nr_this_block, cur->bc_ino.ip->i_ino,
39360e3d707SDarrick J. Wong 				cur->bc_flags);
39460e3d707SDarrick J. Wong 
39560e3d707SDarrick J. Wong 		*bpp = NULL;
39660e3d707SDarrick J. Wong 		*blockp = ifp->if_broot;
39760e3d707SDarrick J. Wong 		xfs_btree_set_ptr_null(cur, ptrp);
39860e3d707SDarrick J. Wong 		return 0;
39960e3d707SDarrick J. Wong 	}
40060e3d707SDarrick J. Wong 
40160e3d707SDarrick J. Wong 	/* Claim one of the caller's preallocated blocks. */
40260e3d707SDarrick J. Wong 	xfs_btree_set_ptr_null(cur, &new_ptr);
40360e3d707SDarrick J. Wong 	ret = bbl->claim_block(cur, &new_ptr, priv);
40460e3d707SDarrick J. Wong 	if (ret)
40560e3d707SDarrick J. Wong 		return ret;
40660e3d707SDarrick J. Wong 
40760e3d707SDarrick J. Wong 	ASSERT(!xfs_btree_ptr_is_null(cur, &new_ptr));
40860e3d707SDarrick J. Wong 
40960e3d707SDarrick J. Wong 	ret = xfs_btree_get_buf_block(cur, &new_ptr, &new_block, &new_bp);
41060e3d707SDarrick J. Wong 	if (ret)
41160e3d707SDarrick J. Wong 		return ret;
41260e3d707SDarrick J. Wong 
41360e3d707SDarrick J. Wong 	/*
41460e3d707SDarrick J. Wong 	 * The previous block (if any) is the left sibling of the new block,
41560e3d707SDarrick J. Wong 	 * so set its right sibling pointer to the new block and drop it.
41660e3d707SDarrick J. Wong 	 */
41760e3d707SDarrick J. Wong 	if (*blockp)
41860e3d707SDarrick J. Wong 		xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB);
41960e3d707SDarrick J. Wong 	xfs_btree_bload_drop_buf(buffers_list, bpp);
42060e3d707SDarrick J. Wong 
42160e3d707SDarrick J. Wong 	/* Initialize the new btree block. */
42260e3d707SDarrick J. Wong 	xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block);
42360e3d707SDarrick J. Wong 	xfs_btree_set_sibling(cur, new_block, ptrp, XFS_BB_LEFTSIB);
42460e3d707SDarrick J. Wong 
42560e3d707SDarrick J. Wong 	/* Set the out parameters. */
42660e3d707SDarrick J. Wong 	*bpp = new_bp;
42760e3d707SDarrick J. Wong 	*blockp = new_block;
42860e3d707SDarrick J. Wong 	xfs_btree_copy_ptrs(cur, ptrp, &new_ptr, 1);
42960e3d707SDarrick J. Wong 	return 0;
43060e3d707SDarrick J. Wong }
43160e3d707SDarrick J. Wong 
43260e3d707SDarrick J. Wong /* Load one leaf block. */
43360e3d707SDarrick J. Wong STATIC int
xfs_btree_bload_leaf(struct xfs_btree_cur * cur,unsigned int recs_this_block,xfs_btree_bload_get_record_fn get_record,struct xfs_btree_block * block,void * priv)43460e3d707SDarrick J. Wong xfs_btree_bload_leaf(
43560e3d707SDarrick J. Wong 	struct xfs_btree_cur		*cur,
43660e3d707SDarrick J. Wong 	unsigned int			recs_this_block,
43760e3d707SDarrick J. Wong 	xfs_btree_bload_get_record_fn	get_record,
43860e3d707SDarrick J. Wong 	struct xfs_btree_block		*block,
43960e3d707SDarrick J. Wong 	void				*priv)
44060e3d707SDarrick J. Wong {
44160e3d707SDarrick J. Wong 	unsigned int			j;
44260e3d707SDarrick J. Wong 	int				ret;
44360e3d707SDarrick J. Wong 
44460e3d707SDarrick J. Wong 	/* Fill the leaf block with records. */
44560e3d707SDarrick J. Wong 	for (j = 1; j <= recs_this_block; j++) {
44660e3d707SDarrick J. Wong 		union xfs_btree_rec	*block_rec;
44760e3d707SDarrick J. Wong 
44860e3d707SDarrick J. Wong 		ret = get_record(cur, priv);
44960e3d707SDarrick J. Wong 		if (ret)
45060e3d707SDarrick J. Wong 			return ret;
45160e3d707SDarrick J. Wong 		block_rec = xfs_btree_rec_addr(cur, j, block);
45260e3d707SDarrick J. Wong 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
45360e3d707SDarrick J. Wong 	}
45460e3d707SDarrick J. Wong 
45560e3d707SDarrick J. Wong 	return 0;
45660e3d707SDarrick J. Wong }
45760e3d707SDarrick J. Wong 
45860e3d707SDarrick J. Wong /*
45960e3d707SDarrick J. Wong  * Load one node block with key/ptr pairs.
46060e3d707SDarrick J. Wong  *
46160e3d707SDarrick J. Wong  * child_ptr must point to a block within the next level down in the tree.  A
46260e3d707SDarrick J. Wong  * key/ptr entry will be created in the new node block to the block pointed to
46360e3d707SDarrick J. Wong  * by child_ptr.  On exit, child_ptr points to the next block on the child
46460e3d707SDarrick J. Wong  * level that needs processing.
46560e3d707SDarrick J. Wong  */
46660e3d707SDarrick J. Wong STATIC int
xfs_btree_bload_node(struct xfs_btree_cur * cur,unsigned int recs_this_block,union xfs_btree_ptr * child_ptr,struct xfs_btree_block * block)46760e3d707SDarrick J. Wong xfs_btree_bload_node(
46860e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
46960e3d707SDarrick J. Wong 	unsigned int		recs_this_block,
47060e3d707SDarrick J. Wong 	union xfs_btree_ptr	*child_ptr,
47160e3d707SDarrick J. Wong 	struct xfs_btree_block	*block)
47260e3d707SDarrick J. Wong {
47360e3d707SDarrick J. Wong 	unsigned int		j;
47460e3d707SDarrick J. Wong 	int			ret;
47560e3d707SDarrick J. Wong 
47660e3d707SDarrick J. Wong 	/* Fill the node block with keys and pointers. */
47760e3d707SDarrick J. Wong 	for (j = 1; j <= recs_this_block; j++) {
47860e3d707SDarrick J. Wong 		union xfs_btree_key	child_key;
47960e3d707SDarrick J. Wong 		union xfs_btree_ptr	*block_ptr;
48060e3d707SDarrick J. Wong 		union xfs_btree_key	*block_key;
48160e3d707SDarrick J. Wong 		struct xfs_btree_block	*child_block;
48260e3d707SDarrick J. Wong 		struct xfs_buf		*child_bp;
48360e3d707SDarrick J. Wong 
48460e3d707SDarrick J. Wong 		ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr));
48560e3d707SDarrick J. Wong 
48660e3d707SDarrick J. Wong 		ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block,
48760e3d707SDarrick J. Wong 				&child_bp);
48860e3d707SDarrick J. Wong 		if (ret)
48960e3d707SDarrick J. Wong 			return ret;
49060e3d707SDarrick J. Wong 
49160e3d707SDarrick J. Wong 		block_ptr = xfs_btree_ptr_addr(cur, j, block);
49260e3d707SDarrick J. Wong 		xfs_btree_copy_ptrs(cur, block_ptr, child_ptr, 1);
49360e3d707SDarrick J. Wong 
49460e3d707SDarrick J. Wong 		block_key = xfs_btree_key_addr(cur, j, block);
49560e3d707SDarrick J. Wong 		xfs_btree_get_keys(cur, child_block, &child_key);
49660e3d707SDarrick J. Wong 		xfs_btree_copy_keys(cur, block_key, &child_key, 1);
49760e3d707SDarrick J. Wong 
49860e3d707SDarrick J. Wong 		xfs_btree_get_sibling(cur, child_block, child_ptr,
49960e3d707SDarrick J. Wong 				XFS_BB_RIGHTSIB);
50060e3d707SDarrick J. Wong 		xfs_buf_relse(child_bp);
50160e3d707SDarrick J. Wong 	}
50260e3d707SDarrick J. Wong 
50360e3d707SDarrick J. Wong 	return 0;
50460e3d707SDarrick J. Wong }
50560e3d707SDarrick J. Wong 
50660e3d707SDarrick J. Wong /*
50760e3d707SDarrick J. Wong  * Compute the maximum number of records (or keyptrs) per block that we want to
50860e3d707SDarrick J. Wong  * install at this level in the btree.  Caller is responsible for having set
50960e3d707SDarrick J. Wong  * @cur->bc_ino.forksize to the desired fork size, if appropriate.
51060e3d707SDarrick J. Wong  */
51160e3d707SDarrick J. Wong STATIC unsigned int
xfs_btree_bload_max_npb(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,unsigned int level)51260e3d707SDarrick J. Wong xfs_btree_bload_max_npb(
51360e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
51460e3d707SDarrick J. Wong 	struct xfs_btree_bload	*bbl,
51560e3d707SDarrick J. Wong 	unsigned int		level)
51660e3d707SDarrick J. Wong {
51760e3d707SDarrick J. Wong 	unsigned int		ret;
51860e3d707SDarrick J. Wong 
51960e3d707SDarrick J. Wong 	if (level == cur->bc_nlevels - 1 && cur->bc_ops->get_dmaxrecs)
52060e3d707SDarrick J. Wong 		return cur->bc_ops->get_dmaxrecs(cur, level);
52160e3d707SDarrick J. Wong 
52260e3d707SDarrick J. Wong 	ret = cur->bc_ops->get_maxrecs(cur, level);
52360e3d707SDarrick J. Wong 	if (level == 0)
52460e3d707SDarrick J. Wong 		ret -= bbl->leaf_slack;
52560e3d707SDarrick J. Wong 	else
52660e3d707SDarrick J. Wong 		ret -= bbl->node_slack;
52760e3d707SDarrick J. Wong 	return ret;
52860e3d707SDarrick J. Wong }
52960e3d707SDarrick J. Wong 
53060e3d707SDarrick J. Wong /*
53160e3d707SDarrick J. Wong  * Compute the desired number of records (or keyptrs) per block that we want to
53260e3d707SDarrick J. Wong  * install at this level in the btree, which must be somewhere between minrecs
53360e3d707SDarrick J. Wong  * and max_npb.  The caller is free to install fewer records per block.
53460e3d707SDarrick J. Wong  */
53560e3d707SDarrick J. Wong STATIC unsigned int
xfs_btree_bload_desired_npb(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,unsigned int level)53660e3d707SDarrick J. Wong xfs_btree_bload_desired_npb(
53760e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
53860e3d707SDarrick J. Wong 	struct xfs_btree_bload	*bbl,
53960e3d707SDarrick J. Wong 	unsigned int		level)
54060e3d707SDarrick J. Wong {
54160e3d707SDarrick J. Wong 	unsigned int		npb = xfs_btree_bload_max_npb(cur, bbl, level);
54260e3d707SDarrick J. Wong 
54360e3d707SDarrick J. Wong 	/* Root blocks are not subject to minrecs rules. */
54460e3d707SDarrick J. Wong 	if (level == cur->bc_nlevels - 1)
54560e3d707SDarrick J. Wong 		return max(1U, npb);
54660e3d707SDarrick J. Wong 
54760e3d707SDarrick J. Wong 	return max_t(unsigned int, cur->bc_ops->get_minrecs(cur, level), npb);
54860e3d707SDarrick J. Wong }
54960e3d707SDarrick J. Wong 
55060e3d707SDarrick J. Wong /*
55160e3d707SDarrick J. Wong  * Compute the number of records to be stored in each block at this level and
55260e3d707SDarrick J. Wong  * the number of blocks for this level.  For leaf levels, we must populate an
55360e3d707SDarrick J. Wong  * empty root block even if there are no records, so we have to have at least
55460e3d707SDarrick J. Wong  * one block.
55560e3d707SDarrick J. Wong  */
55660e3d707SDarrick J. Wong STATIC void
xfs_btree_bload_level_geometry(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,unsigned int level,uint64_t nr_this_level,unsigned int * avg_per_block,uint64_t * blocks,uint64_t * blocks_with_extra)55760e3d707SDarrick J. Wong xfs_btree_bload_level_geometry(
55860e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
55960e3d707SDarrick J. Wong 	struct xfs_btree_bload	*bbl,
56060e3d707SDarrick J. Wong 	unsigned int		level,
56160e3d707SDarrick J. Wong 	uint64_t		nr_this_level,
56260e3d707SDarrick J. Wong 	unsigned int		*avg_per_block,
56360e3d707SDarrick J. Wong 	uint64_t		*blocks,
56460e3d707SDarrick J. Wong 	uint64_t		*blocks_with_extra)
56560e3d707SDarrick J. Wong {
56660e3d707SDarrick J. Wong 	uint64_t		npb;
56760e3d707SDarrick J. Wong 	uint64_t		dontcare;
56860e3d707SDarrick J. Wong 	unsigned int		desired_npb;
56960e3d707SDarrick J. Wong 	unsigned int		maxnr;
57060e3d707SDarrick J. Wong 
57160e3d707SDarrick J. Wong 	maxnr = cur->bc_ops->get_maxrecs(cur, level);
57260e3d707SDarrick J. Wong 
57360e3d707SDarrick J. Wong 	/*
57460e3d707SDarrick J. Wong 	 * Compute the number of blocks we need to fill each block with the
57560e3d707SDarrick J. Wong 	 * desired number of records/keyptrs per block.  Because desired_npb
57660e3d707SDarrick J. Wong 	 * could be minrecs, we use regular integer division (which rounds
57760e3d707SDarrick J. Wong 	 * the block count down) so that in the next step the effective # of
57860e3d707SDarrick J. Wong 	 * items per block will never be less than desired_npb.
57960e3d707SDarrick J. Wong 	 */
58060e3d707SDarrick J. Wong 	desired_npb = xfs_btree_bload_desired_npb(cur, bbl, level);
58160e3d707SDarrick J. Wong 	*blocks = div64_u64_rem(nr_this_level, desired_npb, &dontcare);
58260e3d707SDarrick J. Wong 	*blocks = max(1ULL, *blocks);
58360e3d707SDarrick J. Wong 
58460e3d707SDarrick J. Wong 	/*
58560e3d707SDarrick J. Wong 	 * Compute the number of records that we will actually put in each
58660e3d707SDarrick J. Wong 	 * block, assuming that we want to spread the records evenly between
58760e3d707SDarrick J. Wong 	 * the blocks.  Take care that the effective # of items per block (npb)
58860e3d707SDarrick J. Wong 	 * won't exceed maxrecs even for the blocks that get an extra record,
58960e3d707SDarrick J. Wong 	 * since desired_npb could be maxrecs, and in the previous step we
59060e3d707SDarrick J. Wong 	 * rounded the block count down.
59160e3d707SDarrick J. Wong 	 */
59260e3d707SDarrick J. Wong 	npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra);
59360e3d707SDarrick J. Wong 	if (npb > maxnr || (npb == maxnr && *blocks_with_extra > 0)) {
59460e3d707SDarrick J. Wong 		(*blocks)++;
59560e3d707SDarrick J. Wong 		npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra);
59660e3d707SDarrick J. Wong 	}
59760e3d707SDarrick J. Wong 
59860e3d707SDarrick J. Wong 	*avg_per_block = min_t(uint64_t, npb, nr_this_level);
59960e3d707SDarrick J. Wong 
60060e3d707SDarrick J. Wong 	trace_xfs_btree_bload_level_geometry(cur, level, nr_this_level,
60160e3d707SDarrick J. Wong 			*avg_per_block, desired_npb, *blocks,
60260e3d707SDarrick J. Wong 			*blocks_with_extra);
60360e3d707SDarrick J. Wong }
60460e3d707SDarrick J. Wong 
60560e3d707SDarrick J. Wong /*
60660e3d707SDarrick J. Wong  * Ensure a slack value is appropriate for the btree.
60760e3d707SDarrick J. Wong  *
60860e3d707SDarrick J. Wong  * If the slack value is negative, set slack so that we fill the block to
60960e3d707SDarrick J. Wong  * halfway between minrecs and maxrecs.  Make sure the slack is never so large
61060e3d707SDarrick J. Wong  * that we can underflow minrecs.
61160e3d707SDarrick J. Wong  */
61260e3d707SDarrick J. Wong static void
xfs_btree_bload_ensure_slack(struct xfs_btree_cur * cur,int * slack,int level)61360e3d707SDarrick J. Wong xfs_btree_bload_ensure_slack(
61460e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
61560e3d707SDarrick J. Wong 	int			*slack,
61660e3d707SDarrick J. Wong 	int			level)
61760e3d707SDarrick J. Wong {
61860e3d707SDarrick J. Wong 	int			maxr;
61960e3d707SDarrick J. Wong 	int			minr;
62060e3d707SDarrick J. Wong 
62160e3d707SDarrick J. Wong 	maxr = cur->bc_ops->get_maxrecs(cur, level);
62260e3d707SDarrick J. Wong 	minr = cur->bc_ops->get_minrecs(cur, level);
62360e3d707SDarrick J. Wong 
62460e3d707SDarrick J. Wong 	/*
62560e3d707SDarrick J. Wong 	 * If slack is negative, automatically set slack so that we load the
62660e3d707SDarrick J. Wong 	 * btree block approximately halfway between minrecs and maxrecs.
62760e3d707SDarrick J. Wong 	 * Generally, this will net us 75% loading.
62860e3d707SDarrick J. Wong 	 */
62960e3d707SDarrick J. Wong 	if (*slack < 0)
63060e3d707SDarrick J. Wong 		*slack = maxr - ((maxr + minr) >> 1);
63160e3d707SDarrick J. Wong 
63260e3d707SDarrick J. Wong 	*slack = min(*slack, maxr - minr);
63360e3d707SDarrick J. Wong }
63460e3d707SDarrick J. Wong 
63560e3d707SDarrick J. Wong /*
63660e3d707SDarrick J. Wong  * Prepare a btree cursor for a bulk load operation by computing the geometry
63760e3d707SDarrick J. Wong  * fields in bbl.  Caller must ensure that the btree cursor is a staging
63860e3d707SDarrick J. Wong  * cursor.  This function can be called multiple times.
63960e3d707SDarrick J. Wong  */
64060e3d707SDarrick J. Wong int
xfs_btree_bload_compute_geometry(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,uint64_t nr_records)64160e3d707SDarrick J. Wong xfs_btree_bload_compute_geometry(
64260e3d707SDarrick J. Wong 	struct xfs_btree_cur	*cur,
64360e3d707SDarrick J. Wong 	struct xfs_btree_bload	*bbl,
64460e3d707SDarrick J. Wong 	uint64_t		nr_records)
64560e3d707SDarrick J. Wong {
64660e3d707SDarrick J. Wong 	uint64_t		nr_blocks = 0;
64760e3d707SDarrick J. Wong 	uint64_t		nr_this_level;
64860e3d707SDarrick J. Wong 
64960e3d707SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
65060e3d707SDarrick J. Wong 
65160e3d707SDarrick J. Wong 	/*
65260e3d707SDarrick J. Wong 	 * Make sure that the slack values make sense for traditional leaf and
65360e3d707SDarrick J. Wong 	 * node blocks.  Inode-rooted btrees will return different minrecs and
65460e3d707SDarrick J. Wong 	 * maxrecs values for the root block (bc_nlevels == level - 1).  We're
65560e3d707SDarrick J. Wong 	 * checking levels 0 and 1 here, so set bc_nlevels such that the btree
65660e3d707SDarrick J. Wong 	 * code doesn't interpret either as the root level.
65760e3d707SDarrick J. Wong 	 */
658c0643f6fSDarrick J. Wong 	cur->bc_nlevels = cur->bc_maxlevels - 1;
65960e3d707SDarrick J. Wong 	xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0);
66060e3d707SDarrick J. Wong 	xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1);
66160e3d707SDarrick J. Wong 
66260e3d707SDarrick J. Wong 	bbl->nr_records = nr_this_level = nr_records;
663c0643f6fSDarrick J. Wong 	for (cur->bc_nlevels = 1; cur->bc_nlevels <= cur->bc_maxlevels;) {
66460e3d707SDarrick J. Wong 		uint64_t	level_blocks;
66560e3d707SDarrick J. Wong 		uint64_t	dontcare64;
66660e3d707SDarrick J. Wong 		unsigned int	level = cur->bc_nlevels - 1;
66760e3d707SDarrick J. Wong 		unsigned int	avg_per_block;
66860e3d707SDarrick J. Wong 
66960e3d707SDarrick J. Wong 		xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
67060e3d707SDarrick J. Wong 				&avg_per_block, &level_blocks, &dontcare64);
67160e3d707SDarrick J. Wong 
67260e3d707SDarrick J. Wong 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
67360e3d707SDarrick J. Wong 			/*
67460e3d707SDarrick J. Wong 			 * If all the items we want to store at this level
67560e3d707SDarrick J. Wong 			 * would fit in the inode root block, then we have our
67660e3d707SDarrick J. Wong 			 * btree root and are done.
67760e3d707SDarrick J. Wong 			 *
67860e3d707SDarrick J. Wong 			 * Note that bmap btrees forbid records in the root.
67960e3d707SDarrick J. Wong 			 */
68060e3d707SDarrick J. Wong 			if (level != 0 && nr_this_level <= avg_per_block) {
68160e3d707SDarrick J. Wong 				nr_blocks++;
68260e3d707SDarrick J. Wong 				break;
68360e3d707SDarrick J. Wong 			}
68460e3d707SDarrick J. Wong 
68560e3d707SDarrick J. Wong 			/*
68660e3d707SDarrick J. Wong 			 * Otherwise, we have to store all the items for this
68760e3d707SDarrick J. Wong 			 * level in traditional btree blocks and therefore need
68860e3d707SDarrick J. Wong 			 * another level of btree to point to those blocks.
68960e3d707SDarrick J. Wong 			 *
69060e3d707SDarrick J. Wong 			 * We have to re-compute the geometry for each level of
69160e3d707SDarrick J. Wong 			 * an inode-rooted btree because the geometry differs
69260e3d707SDarrick J. Wong 			 * between a btree root in an inode fork and a
69360e3d707SDarrick J. Wong 			 * traditional btree block.
69460e3d707SDarrick J. Wong 			 *
69560e3d707SDarrick J. Wong 			 * This distinction is made in the btree code based on
69660e3d707SDarrick J. Wong 			 * whether level == bc_nlevels - 1.  Based on the
69760e3d707SDarrick J. Wong 			 * previous root block size check against the root
69860e3d707SDarrick J. Wong 			 * block geometry, we know that we aren't yet ready to
69960e3d707SDarrick J. Wong 			 * populate the root.  Increment bc_nevels and
70060e3d707SDarrick J. Wong 			 * recalculate the geometry for a traditional
70160e3d707SDarrick J. Wong 			 * block-based btree level.
70260e3d707SDarrick J. Wong 			 */
70360e3d707SDarrick J. Wong 			cur->bc_nlevels++;
704c0643f6fSDarrick J. Wong 			ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
70560e3d707SDarrick J. Wong 			xfs_btree_bload_level_geometry(cur, bbl, level,
70660e3d707SDarrick J. Wong 					nr_this_level, &avg_per_block,
70760e3d707SDarrick J. Wong 					&level_blocks, &dontcare64);
70860e3d707SDarrick J. Wong 		} else {
70960e3d707SDarrick J. Wong 			/*
71060e3d707SDarrick J. Wong 			 * If all the items we want to store at this level
71160e3d707SDarrick J. Wong 			 * would fit in a single root block, we're done.
71260e3d707SDarrick J. Wong 			 */
71360e3d707SDarrick J. Wong 			if (nr_this_level <= avg_per_block) {
71460e3d707SDarrick J. Wong 				nr_blocks++;
71560e3d707SDarrick J. Wong 				break;
71660e3d707SDarrick J. Wong 			}
71760e3d707SDarrick J. Wong 
71860e3d707SDarrick J. Wong 			/* Otherwise, we need another level of btree. */
71960e3d707SDarrick J. Wong 			cur->bc_nlevels++;
720c0643f6fSDarrick J. Wong 			ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
72160e3d707SDarrick J. Wong 		}
72260e3d707SDarrick J. Wong 
72360e3d707SDarrick J. Wong 		nr_blocks += level_blocks;
72460e3d707SDarrick J. Wong 		nr_this_level = level_blocks;
72560e3d707SDarrick J. Wong 	}
72660e3d707SDarrick J. Wong 
727c0643f6fSDarrick J. Wong 	if (cur->bc_nlevels > cur->bc_maxlevels)
72860e3d707SDarrick J. Wong 		return -EOVERFLOW;
72960e3d707SDarrick J. Wong 
73060e3d707SDarrick J. Wong 	bbl->btree_height = cur->bc_nlevels;
73160e3d707SDarrick J. Wong 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
73260e3d707SDarrick J. Wong 		bbl->nr_blocks = nr_blocks - 1;
73360e3d707SDarrick J. Wong 	else
73460e3d707SDarrick J. Wong 		bbl->nr_blocks = nr_blocks;
73560e3d707SDarrick J. Wong 	return 0;
73660e3d707SDarrick J. Wong }
73760e3d707SDarrick J. Wong 
73860e3d707SDarrick J. Wong /* Bulk load a btree given the parameters and geometry established in bbl. */
73960e3d707SDarrick J. Wong int
xfs_btree_bload(struct xfs_btree_cur * cur,struct xfs_btree_bload * bbl,void * priv)74060e3d707SDarrick J. Wong xfs_btree_bload(
74160e3d707SDarrick J. Wong 	struct xfs_btree_cur		*cur,
74260e3d707SDarrick J. Wong 	struct xfs_btree_bload		*bbl,
74360e3d707SDarrick J. Wong 	void				*priv)
74460e3d707SDarrick J. Wong {
74560e3d707SDarrick J. Wong 	struct list_head		buffers_list;
74660e3d707SDarrick J. Wong 	union xfs_btree_ptr		child_ptr;
74760e3d707SDarrick J. Wong 	union xfs_btree_ptr		ptr;
74860e3d707SDarrick J. Wong 	struct xfs_buf			*bp = NULL;
74960e3d707SDarrick J. Wong 	struct xfs_btree_block		*block = NULL;
75060e3d707SDarrick J. Wong 	uint64_t			nr_this_level = bbl->nr_records;
75160e3d707SDarrick J. Wong 	uint64_t			blocks;
75260e3d707SDarrick J. Wong 	uint64_t			i;
75360e3d707SDarrick J. Wong 	uint64_t			blocks_with_extra;
75460e3d707SDarrick J. Wong 	uint64_t			total_blocks = 0;
75560e3d707SDarrick J. Wong 	unsigned int			avg_per_block;
75660e3d707SDarrick J. Wong 	unsigned int			level = 0;
75760e3d707SDarrick J. Wong 	int				ret;
75860e3d707SDarrick J. Wong 
75960e3d707SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
76060e3d707SDarrick J. Wong 
76160e3d707SDarrick J. Wong 	INIT_LIST_HEAD(&buffers_list);
76260e3d707SDarrick J. Wong 	cur->bc_nlevels = bbl->btree_height;
76360e3d707SDarrick J. Wong 	xfs_btree_set_ptr_null(cur, &child_ptr);
76460e3d707SDarrick J. Wong 	xfs_btree_set_ptr_null(cur, &ptr);
76560e3d707SDarrick J. Wong 
76660e3d707SDarrick J. Wong 	xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
76760e3d707SDarrick J. Wong 			&avg_per_block, &blocks, &blocks_with_extra);
76860e3d707SDarrick J. Wong 
76960e3d707SDarrick J. Wong 	/* Load each leaf block. */
77060e3d707SDarrick J. Wong 	for (i = 0; i < blocks; i++) {
77160e3d707SDarrick J. Wong 		unsigned int		nr_this_block = avg_per_block;
77260e3d707SDarrick J. Wong 
77360e3d707SDarrick J. Wong 		/*
77460e3d707SDarrick J. Wong 		 * Due to rounding, btree blocks will not be evenly populated
77560e3d707SDarrick J. Wong 		 * in most cases.  blocks_with_extra tells us how many blocks
77660e3d707SDarrick J. Wong 		 * will receive an extra record to distribute the excess across
77760e3d707SDarrick J. Wong 		 * the current level as evenly as possible.
77860e3d707SDarrick J. Wong 		 */
77960e3d707SDarrick J. Wong 		if (i < blocks_with_extra)
78060e3d707SDarrick J. Wong 			nr_this_block++;
78160e3d707SDarrick J. Wong 
78260e3d707SDarrick J. Wong 		ret = xfs_btree_bload_prep_block(cur, bbl, &buffers_list, level,
78360e3d707SDarrick J. Wong 				nr_this_block, &ptr, &bp, &block, priv);
78460e3d707SDarrick J. Wong 		if (ret)
78560e3d707SDarrick J. Wong 			goto out;
78660e3d707SDarrick J. Wong 
78760e3d707SDarrick J. Wong 		trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr,
78860e3d707SDarrick J. Wong 				nr_this_block);
78960e3d707SDarrick J. Wong 
79060e3d707SDarrick J. Wong 		ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record,
79160e3d707SDarrick J. Wong 				block, priv);
79260e3d707SDarrick J. Wong 		if (ret)
79360e3d707SDarrick J. Wong 			goto out;
79460e3d707SDarrick J. Wong 
79560e3d707SDarrick J. Wong 		/*
79660e3d707SDarrick J. Wong 		 * Record the leftmost leaf pointer so we know where to start
79760e3d707SDarrick J. Wong 		 * with the first node level.
79860e3d707SDarrick J. Wong 		 */
79960e3d707SDarrick J. Wong 		if (i == 0)
80060e3d707SDarrick J. Wong 			xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1);
80160e3d707SDarrick J. Wong 	}
80260e3d707SDarrick J. Wong 	total_blocks += blocks;
80360e3d707SDarrick J. Wong 	xfs_btree_bload_drop_buf(&buffers_list, &bp);
80460e3d707SDarrick J. Wong 
80560e3d707SDarrick J. Wong 	/* Populate the internal btree nodes. */
80660e3d707SDarrick J. Wong 	for (level = 1; level < cur->bc_nlevels; level++) {
80760e3d707SDarrick J. Wong 		union xfs_btree_ptr	first_ptr;
80860e3d707SDarrick J. Wong 
80960e3d707SDarrick J. Wong 		nr_this_level = blocks;
81060e3d707SDarrick J. Wong 		block = NULL;
81160e3d707SDarrick J. Wong 		xfs_btree_set_ptr_null(cur, &ptr);
81260e3d707SDarrick J. Wong 
81360e3d707SDarrick J. Wong 		xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
81460e3d707SDarrick J. Wong 				&avg_per_block, &blocks, &blocks_with_extra);
81560e3d707SDarrick J. Wong 
81660e3d707SDarrick J. Wong 		/* Load each node block. */
81760e3d707SDarrick J. Wong 		for (i = 0; i < blocks; i++) {
81860e3d707SDarrick J. Wong 			unsigned int	nr_this_block = avg_per_block;
81960e3d707SDarrick J. Wong 
82060e3d707SDarrick J. Wong 			if (i < blocks_with_extra)
82160e3d707SDarrick J. Wong 				nr_this_block++;
82260e3d707SDarrick J. Wong 
82360e3d707SDarrick J. Wong 			ret = xfs_btree_bload_prep_block(cur, bbl,
82460e3d707SDarrick J. Wong 					&buffers_list, level, nr_this_block,
82560e3d707SDarrick J. Wong 					&ptr, &bp, &block, priv);
82660e3d707SDarrick J. Wong 			if (ret)
82760e3d707SDarrick J. Wong 				goto out;
82860e3d707SDarrick J. Wong 
82960e3d707SDarrick J. Wong 			trace_xfs_btree_bload_block(cur, level, i, blocks,
83060e3d707SDarrick J. Wong 					&ptr, nr_this_block);
83160e3d707SDarrick J. Wong 
83260e3d707SDarrick J. Wong 			ret = xfs_btree_bload_node(cur, nr_this_block,
83360e3d707SDarrick J. Wong 					&child_ptr, block);
83460e3d707SDarrick J. Wong 			if (ret)
83560e3d707SDarrick J. Wong 				goto out;
83660e3d707SDarrick J. Wong 
83760e3d707SDarrick J. Wong 			/*
83860e3d707SDarrick J. Wong 			 * Record the leftmost node pointer so that we know
83960e3d707SDarrick J. Wong 			 * where to start the next node level above this one.
84060e3d707SDarrick J. Wong 			 */
84160e3d707SDarrick J. Wong 			if (i == 0)
84260e3d707SDarrick J. Wong 				xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1);
84360e3d707SDarrick J. Wong 		}
84460e3d707SDarrick J. Wong 		total_blocks += blocks;
84560e3d707SDarrick J. Wong 		xfs_btree_bload_drop_buf(&buffers_list, &bp);
84660e3d707SDarrick J. Wong 		xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1);
84760e3d707SDarrick J. Wong 	}
84860e3d707SDarrick J. Wong 
84960e3d707SDarrick J. Wong 	/* Initialize the new root. */
85060e3d707SDarrick J. Wong 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
85160e3d707SDarrick J. Wong 		ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
85260e3d707SDarrick J. Wong 		cur->bc_ino.ifake->if_levels = cur->bc_nlevels;
85360e3d707SDarrick J. Wong 		cur->bc_ino.ifake->if_blocks = total_blocks - 1;
85460e3d707SDarrick J. Wong 	} else {
85560e3d707SDarrick J. Wong 		cur->bc_ag.afake->af_root = be32_to_cpu(ptr.s);
85660e3d707SDarrick J. Wong 		cur->bc_ag.afake->af_levels = cur->bc_nlevels;
85760e3d707SDarrick J. Wong 		cur->bc_ag.afake->af_blocks = total_blocks;
85860e3d707SDarrick J. Wong 	}
85960e3d707SDarrick J. Wong 
86060e3d707SDarrick J. Wong 	/*
86160e3d707SDarrick J. Wong 	 * Write the new blocks to disk.  If the ordered list isn't empty after
86260e3d707SDarrick J. Wong 	 * that, then something went wrong and we have to fail.  This should
86360e3d707SDarrick J. Wong 	 * never happen, but we'll check anyway.
86460e3d707SDarrick J. Wong 	 */
86560e3d707SDarrick J. Wong 	ret = xfs_buf_delwri_submit(&buffers_list);
86660e3d707SDarrick J. Wong 	if (ret)
86760e3d707SDarrick J. Wong 		goto out;
86860e3d707SDarrick J. Wong 	if (!list_empty(&buffers_list)) {
86960e3d707SDarrick J. Wong 		ASSERT(list_empty(&buffers_list));
87060e3d707SDarrick J. Wong 		ret = -EIO;
87160e3d707SDarrick J. Wong 	}
87260e3d707SDarrick J. Wong 
87360e3d707SDarrick J. Wong out:
87460e3d707SDarrick J. Wong 	xfs_buf_delwri_cancel(&buffers_list);
87560e3d707SDarrick J. Wong 	if (bp)
87660e3d707SDarrick J. Wong 		xfs_buf_relse(bp);
87760e3d707SDarrick J. Wong 	return ret;
87860e3d707SDarrick J. Wong }
879