13934e8ebSDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
23934e8ebSDarrick J. Wong /*
33934e8ebSDarrick J. Wong * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
43934e8ebSDarrick J. Wong * Author: Darrick J. Wong <djwong@kernel.org>
53934e8ebSDarrick J. Wong */
63934e8ebSDarrick J. Wong #include "xfs.h"
73934e8ebSDarrick J. Wong #include "xfs_fs.h"
83934e8ebSDarrick J. Wong #include "xfs_shared.h"
93934e8ebSDarrick J. Wong #include "xfs_format.h"
103934e8ebSDarrick J. Wong #include "scrub/xfile.h"
113934e8ebSDarrick J. Wong #include "scrub/xfarray.h"
123934e8ebSDarrick J. Wong #include "scrub/scrub.h"
133934e8ebSDarrick J. Wong #include "scrub/trace.h"
143934e8ebSDarrick J. Wong
153934e8ebSDarrick J. Wong /*
163934e8ebSDarrick J. Wong * Large Arrays of Fixed-Size Records
173934e8ebSDarrick J. Wong * ==================================
183934e8ebSDarrick J. Wong *
193934e8ebSDarrick J. Wong * This memory array uses an xfile (which itself is a memfd "file") to store
203934e8ebSDarrick J. Wong * large numbers of fixed-size records in memory that can be paged out. This
213934e8ebSDarrick J. Wong * puts less stress on the memory reclaim algorithms during an online repair
223934e8ebSDarrick J. Wong * because we don't have to pin so much memory. However, array access is less
233934e8ebSDarrick J. Wong * direct than would be in a regular memory array. Access to the array is
243934e8ebSDarrick J. Wong * performed via indexed load and store methods, and an append method is
253934e8ebSDarrick J. Wong * provided for convenience. Array elements can be unset, which sets them to
263934e8ebSDarrick J. Wong * all zeroes. Unset entries are skipped during iteration, though direct loads
273934e8ebSDarrick J. Wong * will return a zeroed buffer. Callers are responsible for concurrency
283934e8ebSDarrick J. Wong * control.
293934e8ebSDarrick J. Wong */
303934e8ebSDarrick J. Wong
313934e8ebSDarrick J. Wong /*
323934e8ebSDarrick J. Wong * Pointer to scratch space. Because we can't access the xfile data directly,
333934e8ebSDarrick J. Wong * we allocate a small amount of memory on the end of the xfarray structure to
343934e8ebSDarrick J. Wong * buffer array items when we need space to store values temporarily.
353934e8ebSDarrick J. Wong */
xfarray_scratch(struct xfarray * array)363934e8ebSDarrick J. Wong static inline void *xfarray_scratch(struct xfarray *array)
373934e8ebSDarrick J. Wong {
383934e8ebSDarrick J. Wong return (array + 1);
393934e8ebSDarrick J. Wong }
403934e8ebSDarrick J. Wong
413934e8ebSDarrick J. Wong /* Compute array index given an xfile offset. */
423934e8ebSDarrick J. Wong static xfarray_idx_t
xfarray_idx(struct xfarray * array,loff_t pos)433934e8ebSDarrick J. Wong xfarray_idx(
443934e8ebSDarrick J. Wong struct xfarray *array,
453934e8ebSDarrick J. Wong loff_t pos)
463934e8ebSDarrick J. Wong {
473934e8ebSDarrick J. Wong if (array->obj_size_log >= 0)
483934e8ebSDarrick J. Wong return (xfarray_idx_t)pos >> array->obj_size_log;
493934e8ebSDarrick J. Wong
503934e8ebSDarrick J. Wong return div_u64((xfarray_idx_t)pos, array->obj_size);
513934e8ebSDarrick J. Wong }
523934e8ebSDarrick J. Wong
533934e8ebSDarrick J. Wong /* Compute xfile offset of array element. */
xfarray_pos(struct xfarray * array,xfarray_idx_t idx)543934e8ebSDarrick J. Wong static inline loff_t xfarray_pos(struct xfarray *array, xfarray_idx_t idx)
553934e8ebSDarrick J. Wong {
563934e8ebSDarrick J. Wong if (array->obj_size_log >= 0)
573934e8ebSDarrick J. Wong return idx << array->obj_size_log;
583934e8ebSDarrick J. Wong
593934e8ebSDarrick J. Wong return idx * array->obj_size;
603934e8ebSDarrick J. Wong }
613934e8ebSDarrick J. Wong
623934e8ebSDarrick J. Wong /*
633934e8ebSDarrick J. Wong * Initialize a big memory array. Array records cannot be larger than a
643934e8ebSDarrick J. Wong * page, and the array cannot span more bytes than the page cache supports.
653934e8ebSDarrick J. Wong * If @required_capacity is nonzero, the maximum array size will be set to this
663934e8ebSDarrick J. Wong * quantity and the array creation will fail if the underlying storage cannot
673934e8ebSDarrick J. Wong * support that many records.
683934e8ebSDarrick J. Wong */
693934e8ebSDarrick J. Wong int
xfarray_create(const char * description,unsigned long long required_capacity,size_t obj_size,struct xfarray ** arrayp)703934e8ebSDarrick J. Wong xfarray_create(
713934e8ebSDarrick J. Wong const char *description,
723934e8ebSDarrick J. Wong unsigned long long required_capacity,
733934e8ebSDarrick J. Wong size_t obj_size,
743934e8ebSDarrick J. Wong struct xfarray **arrayp)
753934e8ebSDarrick J. Wong {
763934e8ebSDarrick J. Wong struct xfarray *array;
773934e8ebSDarrick J. Wong struct xfile *xfile;
783934e8ebSDarrick J. Wong int error;
793934e8ebSDarrick J. Wong
803934e8ebSDarrick J. Wong ASSERT(obj_size < PAGE_SIZE);
813934e8ebSDarrick J. Wong
823934e8ebSDarrick J. Wong error = xfile_create(description, 0, &xfile);
833934e8ebSDarrick J. Wong if (error)
843934e8ebSDarrick J. Wong return error;
853934e8ebSDarrick J. Wong
863934e8ebSDarrick J. Wong error = -ENOMEM;
873934e8ebSDarrick J. Wong array = kzalloc(sizeof(struct xfarray) + obj_size, XCHK_GFP_FLAGS);
883934e8ebSDarrick J. Wong if (!array)
893934e8ebSDarrick J. Wong goto out_xfile;
903934e8ebSDarrick J. Wong
913934e8ebSDarrick J. Wong array->xfile = xfile;
923934e8ebSDarrick J. Wong array->obj_size = obj_size;
933934e8ebSDarrick J. Wong
943934e8ebSDarrick J. Wong if (is_power_of_2(obj_size))
953934e8ebSDarrick J. Wong array->obj_size_log = ilog2(obj_size);
963934e8ebSDarrick J. Wong else
973934e8ebSDarrick J. Wong array->obj_size_log = -1;
983934e8ebSDarrick J. Wong
993934e8ebSDarrick J. Wong array->max_nr = xfarray_idx(array, MAX_LFS_FILESIZE);
1003934e8ebSDarrick J. Wong trace_xfarray_create(array, required_capacity);
1013934e8ebSDarrick J. Wong
1023934e8ebSDarrick J. Wong if (required_capacity > 0) {
1033934e8ebSDarrick J. Wong if (array->max_nr < required_capacity) {
1043934e8ebSDarrick J. Wong error = -ENOMEM;
1053934e8ebSDarrick J. Wong goto out_xfarray;
1063934e8ebSDarrick J. Wong }
1073934e8ebSDarrick J. Wong array->max_nr = required_capacity;
1083934e8ebSDarrick J. Wong }
1093934e8ebSDarrick J. Wong
1103934e8ebSDarrick J. Wong *arrayp = array;
1113934e8ebSDarrick J. Wong return 0;
1123934e8ebSDarrick J. Wong
1133934e8ebSDarrick J. Wong out_xfarray:
1143934e8ebSDarrick J. Wong kfree(array);
1153934e8ebSDarrick J. Wong out_xfile:
1163934e8ebSDarrick J. Wong xfile_destroy(xfile);
1173934e8ebSDarrick J. Wong return error;
1183934e8ebSDarrick J. Wong }
1193934e8ebSDarrick J. Wong
1203934e8ebSDarrick J. Wong /* Destroy the array. */
1213934e8ebSDarrick J. Wong void
xfarray_destroy(struct xfarray * array)1223934e8ebSDarrick J. Wong xfarray_destroy(
1233934e8ebSDarrick J. Wong struct xfarray *array)
1243934e8ebSDarrick J. Wong {
1253934e8ebSDarrick J. Wong xfile_destroy(array->xfile);
1263934e8ebSDarrick J. Wong kfree(array);
1273934e8ebSDarrick J. Wong }
1283934e8ebSDarrick J. Wong
1293934e8ebSDarrick J. Wong /* Load an element from the array. */
1303934e8ebSDarrick J. Wong int
xfarray_load(struct xfarray * array,xfarray_idx_t idx,void * ptr)1313934e8ebSDarrick J. Wong xfarray_load(
1323934e8ebSDarrick J. Wong struct xfarray *array,
1333934e8ebSDarrick J. Wong xfarray_idx_t idx,
1343934e8ebSDarrick J. Wong void *ptr)
1353934e8ebSDarrick J. Wong {
1363934e8ebSDarrick J. Wong if (idx >= array->nr)
1373934e8ebSDarrick J. Wong return -ENODATA;
1383934e8ebSDarrick J. Wong
1393934e8ebSDarrick J. Wong return xfile_obj_load(array->xfile, ptr, array->obj_size,
1403934e8ebSDarrick J. Wong xfarray_pos(array, idx));
1413934e8ebSDarrick J. Wong }
1423934e8ebSDarrick J. Wong
1433934e8ebSDarrick J. Wong /* Is this array element potentially unset? */
1443934e8ebSDarrick J. Wong static inline bool
xfarray_is_unset(struct xfarray * array,loff_t pos)1453934e8ebSDarrick J. Wong xfarray_is_unset(
1463934e8ebSDarrick J. Wong struct xfarray *array,
1473934e8ebSDarrick J. Wong loff_t pos)
1483934e8ebSDarrick J. Wong {
1493934e8ebSDarrick J. Wong void *temp = xfarray_scratch(array);
1503934e8ebSDarrick J. Wong int error;
1513934e8ebSDarrick J. Wong
1523934e8ebSDarrick J. Wong if (array->unset_slots == 0)
1533934e8ebSDarrick J. Wong return false;
1543934e8ebSDarrick J. Wong
1553934e8ebSDarrick J. Wong error = xfile_obj_load(array->xfile, temp, array->obj_size, pos);
1563934e8ebSDarrick J. Wong if (!error && xfarray_element_is_null(array, temp))
1573934e8ebSDarrick J. Wong return true;
1583934e8ebSDarrick J. Wong
1593934e8ebSDarrick J. Wong return false;
1603934e8ebSDarrick J. Wong }
1613934e8ebSDarrick J. Wong
1623934e8ebSDarrick J. Wong /*
1633934e8ebSDarrick J. Wong * Unset an array element. If @idx is the last element in the array, the
1643934e8ebSDarrick J. Wong * array will be truncated. Otherwise, the entry will be zeroed.
1653934e8ebSDarrick J. Wong */
1663934e8ebSDarrick J. Wong int
xfarray_unset(struct xfarray * array,xfarray_idx_t idx)1673934e8ebSDarrick J. Wong xfarray_unset(
1683934e8ebSDarrick J. Wong struct xfarray *array,
1693934e8ebSDarrick J. Wong xfarray_idx_t idx)
1703934e8ebSDarrick J. Wong {
1713934e8ebSDarrick J. Wong void *temp = xfarray_scratch(array);
1723934e8ebSDarrick J. Wong loff_t pos = xfarray_pos(array, idx);
1733934e8ebSDarrick J. Wong int error;
1743934e8ebSDarrick J. Wong
1753934e8ebSDarrick J. Wong if (idx >= array->nr)
1763934e8ebSDarrick J. Wong return -ENODATA;
1773934e8ebSDarrick J. Wong
1783934e8ebSDarrick J. Wong if (idx == array->nr - 1) {
1793934e8ebSDarrick J. Wong array->nr--;
1803934e8ebSDarrick J. Wong return 0;
1813934e8ebSDarrick J. Wong }
1823934e8ebSDarrick J. Wong
1833934e8ebSDarrick J. Wong if (xfarray_is_unset(array, pos))
1843934e8ebSDarrick J. Wong return 0;
1853934e8ebSDarrick J. Wong
1863934e8ebSDarrick J. Wong memset(temp, 0, array->obj_size);
1873934e8ebSDarrick J. Wong error = xfile_obj_store(array->xfile, temp, array->obj_size, pos);
1883934e8ebSDarrick J. Wong if (error)
1893934e8ebSDarrick J. Wong return error;
1903934e8ebSDarrick J. Wong
1913934e8ebSDarrick J. Wong array->unset_slots++;
1923934e8ebSDarrick J. Wong return 0;
1933934e8ebSDarrick J. Wong }
1943934e8ebSDarrick J. Wong
1953934e8ebSDarrick J. Wong /*
1963934e8ebSDarrick J. Wong * Store an element in the array. The element must not be completely zeroed,
1973934e8ebSDarrick J. Wong * because those are considered unset sparse elements.
1983934e8ebSDarrick J. Wong */
1993934e8ebSDarrick J. Wong int
xfarray_store(struct xfarray * array,xfarray_idx_t idx,const void * ptr)2003934e8ebSDarrick J. Wong xfarray_store(
2013934e8ebSDarrick J. Wong struct xfarray *array,
2023934e8ebSDarrick J. Wong xfarray_idx_t idx,
2033934e8ebSDarrick J. Wong const void *ptr)
2043934e8ebSDarrick J. Wong {
2053934e8ebSDarrick J. Wong int ret;
2063934e8ebSDarrick J. Wong
2073934e8ebSDarrick J. Wong if (idx >= array->max_nr)
2083934e8ebSDarrick J. Wong return -EFBIG;
2093934e8ebSDarrick J. Wong
2103934e8ebSDarrick J. Wong ASSERT(!xfarray_element_is_null(array, ptr));
2113934e8ebSDarrick J. Wong
2123934e8ebSDarrick J. Wong ret = xfile_obj_store(array->xfile, ptr, array->obj_size,
2133934e8ebSDarrick J. Wong xfarray_pos(array, idx));
2143934e8ebSDarrick J. Wong if (ret)
2153934e8ebSDarrick J. Wong return ret;
2163934e8ebSDarrick J. Wong
2173934e8ebSDarrick J. Wong array->nr = max(array->nr, idx + 1);
2183934e8ebSDarrick J. Wong return 0;
2193934e8ebSDarrick J. Wong }
2203934e8ebSDarrick J. Wong
2213934e8ebSDarrick J. Wong /* Is this array element NULL? */
2223934e8ebSDarrick J. Wong bool
xfarray_element_is_null(struct xfarray * array,const void * ptr)2233934e8ebSDarrick J. Wong xfarray_element_is_null(
2243934e8ebSDarrick J. Wong struct xfarray *array,
2253934e8ebSDarrick J. Wong const void *ptr)
2263934e8ebSDarrick J. Wong {
2273934e8ebSDarrick J. Wong return !memchr_inv(ptr, 0, array->obj_size);
2283934e8ebSDarrick J. Wong }
2293934e8ebSDarrick J. Wong
2303934e8ebSDarrick J. Wong /*
2313934e8ebSDarrick J. Wong * Store an element anywhere in the array that is unset. If there are no
2323934e8ebSDarrick J. Wong * unset slots, append the element to the array.
2333934e8ebSDarrick J. Wong */
2343934e8ebSDarrick J. Wong int
xfarray_store_anywhere(struct xfarray * array,const void * ptr)2353934e8ebSDarrick J. Wong xfarray_store_anywhere(
2363934e8ebSDarrick J. Wong struct xfarray *array,
2373934e8ebSDarrick J. Wong const void *ptr)
2383934e8ebSDarrick J. Wong {
2393934e8ebSDarrick J. Wong void *temp = xfarray_scratch(array);
2403934e8ebSDarrick J. Wong loff_t endpos = xfarray_pos(array, array->nr);
2413934e8ebSDarrick J. Wong loff_t pos;
2423934e8ebSDarrick J. Wong int error;
2433934e8ebSDarrick J. Wong
2443934e8ebSDarrick J. Wong /* Find an unset slot to put it in. */
2453934e8ebSDarrick J. Wong for (pos = 0;
2463934e8ebSDarrick J. Wong pos < endpos && array->unset_slots > 0;
2473934e8ebSDarrick J. Wong pos += array->obj_size) {
2483934e8ebSDarrick J. Wong error = xfile_obj_load(array->xfile, temp, array->obj_size,
2493934e8ebSDarrick J. Wong pos);
2503934e8ebSDarrick J. Wong if (error || !xfarray_element_is_null(array, temp))
2513934e8ebSDarrick J. Wong continue;
2523934e8ebSDarrick J. Wong
2533934e8ebSDarrick J. Wong error = xfile_obj_store(array->xfile, ptr, array->obj_size,
2543934e8ebSDarrick J. Wong pos);
2553934e8ebSDarrick J. Wong if (error)
2563934e8ebSDarrick J. Wong return error;
2573934e8ebSDarrick J. Wong
2583934e8ebSDarrick J. Wong array->unset_slots--;
2593934e8ebSDarrick J. Wong return 0;
2603934e8ebSDarrick J. Wong }
2613934e8ebSDarrick J. Wong
2623934e8ebSDarrick J. Wong /* No unset slots found; attach it on the end. */
2633934e8ebSDarrick J. Wong array->unset_slots = 0;
2643934e8ebSDarrick J. Wong return xfarray_append(array, ptr);
2653934e8ebSDarrick J. Wong }
2663934e8ebSDarrick J. Wong
2673934e8ebSDarrick J. Wong /* Return length of array. */
2683934e8ebSDarrick J. Wong uint64_t
xfarray_length(struct xfarray * array)2693934e8ebSDarrick J. Wong xfarray_length(
2703934e8ebSDarrick J. Wong struct xfarray *array)
2713934e8ebSDarrick J. Wong {
2723934e8ebSDarrick J. Wong return array->nr;
2733934e8ebSDarrick J. Wong }
2743934e8ebSDarrick J. Wong
2753934e8ebSDarrick J. Wong /*
2763934e8ebSDarrick J. Wong * Decide which array item we're going to read as part of an _iter_get.
2773934e8ebSDarrick J. Wong * @cur is the array index, and @pos is the file offset of that array index in
2783934e8ebSDarrick J. Wong * the backing xfile. Returns ENODATA if we reach the end of the records.
2793934e8ebSDarrick J. Wong *
2803934e8ebSDarrick J. Wong * Reading from a hole in a sparse xfile causes page instantiation, so for
2813934e8ebSDarrick J. Wong * iterating a (possibly sparse) array we need to figure out if the cursor is
2823934e8ebSDarrick J. Wong * pointing at a totally uninitialized hole and move the cursor up if
2833934e8ebSDarrick J. Wong * necessary.
2843934e8ebSDarrick J. Wong */
2853934e8ebSDarrick J. Wong static inline int
xfarray_find_data(struct xfarray * array,xfarray_idx_t * cur,loff_t * pos)2863934e8ebSDarrick J. Wong xfarray_find_data(
2873934e8ebSDarrick J. Wong struct xfarray *array,
2883934e8ebSDarrick J. Wong xfarray_idx_t *cur,
2893934e8ebSDarrick J. Wong loff_t *pos)
2903934e8ebSDarrick J. Wong {
2913934e8ebSDarrick J. Wong unsigned int pgoff = offset_in_page(*pos);
2923934e8ebSDarrick J. Wong loff_t end_pos = *pos + array->obj_size - 1;
2933934e8ebSDarrick J. Wong loff_t new_pos;
2943934e8ebSDarrick J. Wong
2953934e8ebSDarrick J. Wong /*
2963934e8ebSDarrick J. Wong * If the current array record is not adjacent to a page boundary, we
2973934e8ebSDarrick J. Wong * are in the middle of the page. We do not need to move the cursor.
2983934e8ebSDarrick J. Wong */
2993934e8ebSDarrick J. Wong if (pgoff != 0 && pgoff + array->obj_size - 1 < PAGE_SIZE)
3003934e8ebSDarrick J. Wong return 0;
3013934e8ebSDarrick J. Wong
3023934e8ebSDarrick J. Wong /*
3033934e8ebSDarrick J. Wong * Call SEEK_DATA on the last byte in the record we're about to read.
3043934e8ebSDarrick J. Wong * If the record ends at (or crosses) the end of a page then we know
3053934e8ebSDarrick J. Wong * that the first byte of the record is backed by pages and don't need
3063934e8ebSDarrick J. Wong * to query it. If instead the record begins at the start of the page
3073934e8ebSDarrick J. Wong * then we know that querying the last byte is just as good as querying
3083934e8ebSDarrick J. Wong * the first byte, since records cannot be larger than a page.
3093934e8ebSDarrick J. Wong *
3103934e8ebSDarrick J. Wong * If the call returns the same file offset, we know this record is
3113934e8ebSDarrick J. Wong * backed by real pages. We do not need to move the cursor.
3123934e8ebSDarrick J. Wong */
3133934e8ebSDarrick J. Wong new_pos = xfile_seek_data(array->xfile, end_pos);
3143934e8ebSDarrick J. Wong if (new_pos == -ENXIO)
3153934e8ebSDarrick J. Wong return -ENODATA;
3163934e8ebSDarrick J. Wong if (new_pos < 0)
3173934e8ebSDarrick J. Wong return new_pos;
3183934e8ebSDarrick J. Wong if (new_pos == end_pos)
3193934e8ebSDarrick J. Wong return 0;
3203934e8ebSDarrick J. Wong
3213934e8ebSDarrick J. Wong /*
3223934e8ebSDarrick J. Wong * Otherwise, SEEK_DATA told us how far up to move the file pointer to
3233934e8ebSDarrick J. Wong * find more data. Move the array index to the first record past the
3243934e8ebSDarrick J. Wong * byte offset we were given.
3253934e8ebSDarrick J. Wong */
3263934e8ebSDarrick J. Wong new_pos = roundup_64(new_pos, array->obj_size);
3273934e8ebSDarrick J. Wong *cur = xfarray_idx(array, new_pos);
3283934e8ebSDarrick J. Wong *pos = xfarray_pos(array, *cur);
3293934e8ebSDarrick J. Wong return 0;
3303934e8ebSDarrick J. Wong }
3313934e8ebSDarrick J. Wong
3323934e8ebSDarrick J. Wong /*
3333934e8ebSDarrick J. Wong * Starting at *idx, fetch the next non-null array entry and advance the index
3343934e8ebSDarrick J. Wong * to set up the next _load_next call. Returns ENODATA if we reach the end of
3353934e8ebSDarrick J. Wong * the array. Callers must set @*idx to XFARRAY_CURSOR_INIT before the first
3363934e8ebSDarrick J. Wong * call to this function.
3373934e8ebSDarrick J. Wong */
3383934e8ebSDarrick J. Wong int
xfarray_load_next(struct xfarray * array,xfarray_idx_t * idx,void * rec)3393934e8ebSDarrick J. Wong xfarray_load_next(
3403934e8ebSDarrick J. Wong struct xfarray *array,
3413934e8ebSDarrick J. Wong xfarray_idx_t *idx,
3423934e8ebSDarrick J. Wong void *rec)
3433934e8ebSDarrick J. Wong {
3443934e8ebSDarrick J. Wong xfarray_idx_t cur = *idx;
3453934e8ebSDarrick J. Wong loff_t pos = xfarray_pos(array, cur);
3463934e8ebSDarrick J. Wong int error;
3473934e8ebSDarrick J. Wong
3483934e8ebSDarrick J. Wong do {
3493934e8ebSDarrick J. Wong if (cur >= array->nr)
3503934e8ebSDarrick J. Wong return -ENODATA;
3513934e8ebSDarrick J. Wong
3523934e8ebSDarrick J. Wong /*
3533934e8ebSDarrick J. Wong * Ask the backing store for the location of next possible
3543934e8ebSDarrick J. Wong * written record, then retrieve that record.
3553934e8ebSDarrick J. Wong */
3563934e8ebSDarrick J. Wong error = xfarray_find_data(array, &cur, &pos);
3573934e8ebSDarrick J. Wong if (error)
3583934e8ebSDarrick J. Wong return error;
3593934e8ebSDarrick J. Wong error = xfarray_load(array, cur, rec);
3603934e8ebSDarrick J. Wong if (error)
3613934e8ebSDarrick J. Wong return error;
3623934e8ebSDarrick J. Wong
3633934e8ebSDarrick J. Wong cur++;
3643934e8ebSDarrick J. Wong pos += array->obj_size;
3653934e8ebSDarrick J. Wong } while (xfarray_element_is_null(array, rec));
3663934e8ebSDarrick J. Wong
3673934e8ebSDarrick J. Wong *idx = cur;
3683934e8ebSDarrick J. Wong return 0;
3693934e8ebSDarrick J. Wong }
370232ea052SDarrick J. Wong
371232ea052SDarrick J. Wong /* Sorting functions */
372232ea052SDarrick J. Wong
373232ea052SDarrick J. Wong #ifdef DEBUG
374232ea052SDarrick J. Wong # define xfarray_sort_bump_loads(si) do { (si)->loads++; } while (0)
375232ea052SDarrick J. Wong # define xfarray_sort_bump_stores(si) do { (si)->stores++; } while (0)
376232ea052SDarrick J. Wong # define xfarray_sort_bump_compares(si) do { (si)->compares++; } while (0)
377c390c645SDarrick J. Wong # define xfarray_sort_bump_heapsorts(si) do { (si)->heapsorts++; } while (0)
378232ea052SDarrick J. Wong #else
379232ea052SDarrick J. Wong # define xfarray_sort_bump_loads(si)
380232ea052SDarrick J. Wong # define xfarray_sort_bump_stores(si)
381232ea052SDarrick J. Wong # define xfarray_sort_bump_compares(si)
382c390c645SDarrick J. Wong # define xfarray_sort_bump_heapsorts(si)
383232ea052SDarrick J. Wong #endif /* DEBUG */
384232ea052SDarrick J. Wong
385232ea052SDarrick J. Wong /* Load an array element for sorting. */
386232ea052SDarrick J. Wong static inline int
xfarray_sort_load(struct xfarray_sortinfo * si,xfarray_idx_t idx,void * ptr)387232ea052SDarrick J. Wong xfarray_sort_load(
388232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
389232ea052SDarrick J. Wong xfarray_idx_t idx,
390232ea052SDarrick J. Wong void *ptr)
391232ea052SDarrick J. Wong {
392232ea052SDarrick J. Wong xfarray_sort_bump_loads(si);
393232ea052SDarrick J. Wong return xfarray_load(si->array, idx, ptr);
394232ea052SDarrick J. Wong }
395232ea052SDarrick J. Wong
396232ea052SDarrick J. Wong /* Store an array element for sorting. */
397232ea052SDarrick J. Wong static inline int
xfarray_sort_store(struct xfarray_sortinfo * si,xfarray_idx_t idx,void * ptr)398232ea052SDarrick J. Wong xfarray_sort_store(
399232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
400232ea052SDarrick J. Wong xfarray_idx_t idx,
401232ea052SDarrick J. Wong void *ptr)
402232ea052SDarrick J. Wong {
403232ea052SDarrick J. Wong xfarray_sort_bump_stores(si);
404232ea052SDarrick J. Wong return xfarray_store(si->array, idx, ptr);
405232ea052SDarrick J. Wong }
406232ea052SDarrick J. Wong
407232ea052SDarrick J. Wong /* Compare an array element for sorting. */
408232ea052SDarrick J. Wong static inline int
xfarray_sort_cmp(struct xfarray_sortinfo * si,const void * a,const void * b)409232ea052SDarrick J. Wong xfarray_sort_cmp(
410232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
411232ea052SDarrick J. Wong const void *a,
412232ea052SDarrick J. Wong const void *b)
413232ea052SDarrick J. Wong {
414232ea052SDarrick J. Wong xfarray_sort_bump_compares(si);
415232ea052SDarrick J. Wong return si->cmp_fn(a, b);
416232ea052SDarrick J. Wong }
417232ea052SDarrick J. Wong
418232ea052SDarrick J. Wong /* Return a pointer to the low index stack for quicksort partitioning. */
xfarray_sortinfo_lo(struct xfarray_sortinfo * si)419232ea052SDarrick J. Wong static inline xfarray_idx_t *xfarray_sortinfo_lo(struct xfarray_sortinfo *si)
420232ea052SDarrick J. Wong {
421232ea052SDarrick J. Wong return (xfarray_idx_t *)(si + 1);
422232ea052SDarrick J. Wong }
423232ea052SDarrick J. Wong
424232ea052SDarrick J. Wong /* Return a pointer to the high index stack for quicksort partitioning. */
xfarray_sortinfo_hi(struct xfarray_sortinfo * si)425232ea052SDarrick J. Wong static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si)
426232ea052SDarrick J. Wong {
427232ea052SDarrick J. Wong return xfarray_sortinfo_lo(si) + si->max_stack_depth;
428232ea052SDarrick J. Wong }
429232ea052SDarrick J. Wong
430*764018caSDarrick J. Wong /* Size of each element in the quicksort pivot array. */
431*764018caSDarrick J. Wong static inline size_t
xfarray_pivot_rec_sz(struct xfarray * array)432*764018caSDarrick J. Wong xfarray_pivot_rec_sz(
433*764018caSDarrick J. Wong struct xfarray *array)
434*764018caSDarrick J. Wong {
435*764018caSDarrick J. Wong return round_up(array->obj_size, 8) + sizeof(xfarray_idx_t);
436*764018caSDarrick J. Wong }
437*764018caSDarrick J. Wong
438232ea052SDarrick J. Wong /* Allocate memory to handle the sort. */
439232ea052SDarrick J. Wong static inline int
xfarray_sortinfo_alloc(struct xfarray * array,xfarray_cmp_fn cmp_fn,unsigned int flags,struct xfarray_sortinfo ** infop)440232ea052SDarrick J. Wong xfarray_sortinfo_alloc(
441232ea052SDarrick J. Wong struct xfarray *array,
442232ea052SDarrick J. Wong xfarray_cmp_fn cmp_fn,
443232ea052SDarrick J. Wong unsigned int flags,
444232ea052SDarrick J. Wong struct xfarray_sortinfo **infop)
445232ea052SDarrick J. Wong {
446232ea052SDarrick J. Wong struct xfarray_sortinfo *si;
447232ea052SDarrick J. Wong size_t nr_bytes = sizeof(struct xfarray_sortinfo);
448*764018caSDarrick J. Wong size_t pivot_rec_sz = xfarray_pivot_rec_sz(array);
449232ea052SDarrick J. Wong int max_stack_depth;
450232ea052SDarrick J. Wong
451232ea052SDarrick J. Wong /*
452*764018caSDarrick J. Wong * The median-of-nine pivot algorithm doesn't work if a subset has
453*764018caSDarrick J. Wong * fewer than 9 items. Make sure the in-memory sort will always take
454*764018caSDarrick J. Wong * over for subsets where this wouldn't be the case.
455*764018caSDarrick J. Wong */
456*764018caSDarrick J. Wong BUILD_BUG_ON(XFARRAY_QSORT_PIVOT_NR >= XFARRAY_ISORT_NR);
457*764018caSDarrick J. Wong
458*764018caSDarrick J. Wong /*
459232ea052SDarrick J. Wong * Tail-call recursion during the partitioning phase means that
460232ea052SDarrick J. Wong * quicksort will never recurse more than log2(nr) times. We need one
461c390c645SDarrick J. Wong * extra level of stack to hold the initial parameters. In-memory
462c390c645SDarrick J. Wong * sort will always take care of the last few levels of recursion for
463c390c645SDarrick J. Wong * us, so we can reduce the stack depth by that much.
464232ea052SDarrick J. Wong */
465c390c645SDarrick J. Wong max_stack_depth = ilog2(array->nr) + 1 - (XFARRAY_ISORT_SHIFT - 1);
466c390c645SDarrick J. Wong if (max_stack_depth < 1)
467c390c645SDarrick J. Wong max_stack_depth = 1;
468232ea052SDarrick J. Wong
469232ea052SDarrick J. Wong /* Each level of quicksort uses a lo and a hi index */
470232ea052SDarrick J. Wong nr_bytes += max_stack_depth * sizeof(xfarray_idx_t) * 2;
471232ea052SDarrick J. Wong
472*764018caSDarrick J. Wong /* Scratchpad for in-memory sort, or finding the pivot */
473*764018caSDarrick J. Wong nr_bytes += max_t(size_t,
474*764018caSDarrick J. Wong (XFARRAY_QSORT_PIVOT_NR + 1) * pivot_rec_sz,
475*764018caSDarrick J. Wong XFARRAY_ISORT_NR * array->obj_size);
476232ea052SDarrick J. Wong
477232ea052SDarrick J. Wong si = kvzalloc(nr_bytes, XCHK_GFP_FLAGS);
478232ea052SDarrick J. Wong if (!si)
479232ea052SDarrick J. Wong return -ENOMEM;
480232ea052SDarrick J. Wong
481232ea052SDarrick J. Wong si->array = array;
482232ea052SDarrick J. Wong si->cmp_fn = cmp_fn;
483232ea052SDarrick J. Wong si->flags = flags;
484232ea052SDarrick J. Wong si->max_stack_depth = max_stack_depth;
485232ea052SDarrick J. Wong si->max_stack_used = 1;
486232ea052SDarrick J. Wong
487232ea052SDarrick J. Wong xfarray_sortinfo_lo(si)[0] = 0;
488232ea052SDarrick J. Wong xfarray_sortinfo_hi(si)[0] = array->nr - 1;
489232ea052SDarrick J. Wong
490232ea052SDarrick J. Wong trace_xfarray_sort(si, nr_bytes);
491232ea052SDarrick J. Wong *infop = si;
492232ea052SDarrick J. Wong return 0;
493232ea052SDarrick J. Wong }
494232ea052SDarrick J. Wong
495232ea052SDarrick J. Wong /* Should this sort be terminated by a fatal signal? */
496232ea052SDarrick J. Wong static inline bool
xfarray_sort_terminated(struct xfarray_sortinfo * si,int * error)497232ea052SDarrick J. Wong xfarray_sort_terminated(
498232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
499232ea052SDarrick J. Wong int *error)
500232ea052SDarrick J. Wong {
501232ea052SDarrick J. Wong /*
502232ea052SDarrick J. Wong * If preemption is disabled, we need to yield to the scheduler every
503232ea052SDarrick J. Wong * few seconds so that we don't run afoul of the soft lockup watchdog
504232ea052SDarrick J. Wong * or RCU stall detector.
505232ea052SDarrick J. Wong */
506232ea052SDarrick J. Wong cond_resched();
507232ea052SDarrick J. Wong
508232ea052SDarrick J. Wong if ((si->flags & XFARRAY_SORT_KILLABLE) &&
509232ea052SDarrick J. Wong fatal_signal_pending(current)) {
510232ea052SDarrick J. Wong if (*error == 0)
511232ea052SDarrick J. Wong *error = -EINTR;
512232ea052SDarrick J. Wong return true;
513232ea052SDarrick J. Wong }
514232ea052SDarrick J. Wong return false;
515232ea052SDarrick J. Wong }
516232ea052SDarrick J. Wong
517c390c645SDarrick J. Wong /* Do we want an in-memory sort? */
518232ea052SDarrick J. Wong static inline bool
xfarray_want_isort(struct xfarray_sortinfo * si,xfarray_idx_t start,xfarray_idx_t end)519232ea052SDarrick J. Wong xfarray_want_isort(
520232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
521232ea052SDarrick J. Wong xfarray_idx_t start,
522232ea052SDarrick J. Wong xfarray_idx_t end)
523232ea052SDarrick J. Wong {
524232ea052SDarrick J. Wong /*
525c390c645SDarrick J. Wong * For array subsets that fit in the scratchpad, it's much faster to
526c390c645SDarrick J. Wong * use the kernel's heapsort than quicksort's stack machine.
527232ea052SDarrick J. Wong */
528c390c645SDarrick J. Wong return (end - start) < XFARRAY_ISORT_NR;
529232ea052SDarrick J. Wong }
530232ea052SDarrick J. Wong
531232ea052SDarrick J. Wong /* Return the scratch space within the sortinfo structure. */
xfarray_sortinfo_isort_scratch(struct xfarray_sortinfo * si)532232ea052SDarrick J. Wong static inline void *xfarray_sortinfo_isort_scratch(struct xfarray_sortinfo *si)
533232ea052SDarrick J. Wong {
534232ea052SDarrick J. Wong return xfarray_sortinfo_hi(si) + si->max_stack_depth;
535232ea052SDarrick J. Wong }
536232ea052SDarrick J. Wong
537232ea052SDarrick J. Wong /*
538c390c645SDarrick J. Wong * Sort a small number of array records using scratchpad memory. The records
539c390c645SDarrick J. Wong * need not be contiguous in the xfile's memory pages.
540232ea052SDarrick J. Wong */
541232ea052SDarrick J. Wong STATIC int
xfarray_isort(struct xfarray_sortinfo * si,xfarray_idx_t lo,xfarray_idx_t hi)542232ea052SDarrick J. Wong xfarray_isort(
543232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
544232ea052SDarrick J. Wong xfarray_idx_t lo,
545232ea052SDarrick J. Wong xfarray_idx_t hi)
546232ea052SDarrick J. Wong {
547c390c645SDarrick J. Wong void *scratch = xfarray_sortinfo_isort_scratch(si);
548c390c645SDarrick J. Wong loff_t lo_pos = xfarray_pos(si->array, lo);
549c390c645SDarrick J. Wong loff_t len = xfarray_pos(si->array, hi - lo + 1);
550232ea052SDarrick J. Wong int error;
551232ea052SDarrick J. Wong
552232ea052SDarrick J. Wong trace_xfarray_isort(si, lo, hi);
553232ea052SDarrick J. Wong
554c390c645SDarrick J. Wong xfarray_sort_bump_loads(si);
555c390c645SDarrick J. Wong error = xfile_obj_load(si->array->xfile, scratch, len, lo_pos);
556232ea052SDarrick J. Wong if (error)
557232ea052SDarrick J. Wong return error;
558232ea052SDarrick J. Wong
559c390c645SDarrick J. Wong xfarray_sort_bump_heapsorts(si);
560c390c645SDarrick J. Wong sort(scratch, hi - lo + 1, si->array->obj_size, si->cmp_fn, NULL);
561232ea052SDarrick J. Wong
562c390c645SDarrick J. Wong xfarray_sort_bump_stores(si);
563c390c645SDarrick J. Wong return xfile_obj_store(si->array->xfile, scratch, len, lo_pos);
564232ea052SDarrick J. Wong }
565232ea052SDarrick J. Wong
566e5b46c75SDarrick J. Wong /* Grab a page for sorting records. */
567e5b46c75SDarrick J. Wong static inline int
xfarray_sort_get_page(struct xfarray_sortinfo * si,loff_t pos,uint64_t len)568e5b46c75SDarrick J. Wong xfarray_sort_get_page(
569e5b46c75SDarrick J. Wong struct xfarray_sortinfo *si,
570e5b46c75SDarrick J. Wong loff_t pos,
571e5b46c75SDarrick J. Wong uint64_t len)
572e5b46c75SDarrick J. Wong {
573e5b46c75SDarrick J. Wong int error;
574e5b46c75SDarrick J. Wong
575e5b46c75SDarrick J. Wong error = xfile_get_page(si->array->xfile, pos, len, &si->xfpage);
576e5b46c75SDarrick J. Wong if (error)
577e5b46c75SDarrick J. Wong return error;
578e5b46c75SDarrick J. Wong
579e5b46c75SDarrick J. Wong /*
580e5b46c75SDarrick J. Wong * xfile pages must never be mapped into userspace, so we skip the
581e5b46c75SDarrick J. Wong * dcache flush when mapping the page.
582e5b46c75SDarrick J. Wong */
583e5b46c75SDarrick J. Wong si->page_kaddr = kmap_local_page(si->xfpage.page);
584e5b46c75SDarrick J. Wong return 0;
585e5b46c75SDarrick J. Wong }
586e5b46c75SDarrick J. Wong
587e5b46c75SDarrick J. Wong /* Release a page we grabbed for sorting records. */
588e5b46c75SDarrick J. Wong static inline int
xfarray_sort_put_page(struct xfarray_sortinfo * si)589e5b46c75SDarrick J. Wong xfarray_sort_put_page(
590e5b46c75SDarrick J. Wong struct xfarray_sortinfo *si)
591e5b46c75SDarrick J. Wong {
592e5b46c75SDarrick J. Wong if (!si->page_kaddr)
593e5b46c75SDarrick J. Wong return 0;
594e5b46c75SDarrick J. Wong
595e5b46c75SDarrick J. Wong kunmap_local(si->page_kaddr);
596e5b46c75SDarrick J. Wong si->page_kaddr = NULL;
597e5b46c75SDarrick J. Wong
598e5b46c75SDarrick J. Wong return xfile_put_page(si->array->xfile, &si->xfpage);
599e5b46c75SDarrick J. Wong }
600e5b46c75SDarrick J. Wong
601e5b46c75SDarrick J. Wong /* Decide if these records are eligible for in-page sorting. */
602e5b46c75SDarrick J. Wong static inline bool
xfarray_want_pagesort(struct xfarray_sortinfo * si,xfarray_idx_t lo,xfarray_idx_t hi)603e5b46c75SDarrick J. Wong xfarray_want_pagesort(
604e5b46c75SDarrick J. Wong struct xfarray_sortinfo *si,
605e5b46c75SDarrick J. Wong xfarray_idx_t lo,
606e5b46c75SDarrick J. Wong xfarray_idx_t hi)
607e5b46c75SDarrick J. Wong {
608e5b46c75SDarrick J. Wong pgoff_t lo_page;
609e5b46c75SDarrick J. Wong pgoff_t hi_page;
610e5b46c75SDarrick J. Wong loff_t end_pos;
611e5b46c75SDarrick J. Wong
612e5b46c75SDarrick J. Wong /* We can only map one page at a time. */
613e5b46c75SDarrick J. Wong lo_page = xfarray_pos(si->array, lo) >> PAGE_SHIFT;
614e5b46c75SDarrick J. Wong end_pos = xfarray_pos(si->array, hi) + si->array->obj_size - 1;
615e5b46c75SDarrick J. Wong hi_page = end_pos >> PAGE_SHIFT;
616e5b46c75SDarrick J. Wong
617e5b46c75SDarrick J. Wong return lo_page == hi_page;
618e5b46c75SDarrick J. Wong }
619e5b46c75SDarrick J. Wong
620e5b46c75SDarrick J. Wong /* Sort a bunch of records that all live in the same memory page. */
621e5b46c75SDarrick J. Wong STATIC int
xfarray_pagesort(struct xfarray_sortinfo * si,xfarray_idx_t lo,xfarray_idx_t hi)622e5b46c75SDarrick J. Wong xfarray_pagesort(
623e5b46c75SDarrick J. Wong struct xfarray_sortinfo *si,
624e5b46c75SDarrick J. Wong xfarray_idx_t lo,
625e5b46c75SDarrick J. Wong xfarray_idx_t hi)
626e5b46c75SDarrick J. Wong {
627e5b46c75SDarrick J. Wong void *startp;
628e5b46c75SDarrick J. Wong loff_t lo_pos = xfarray_pos(si->array, lo);
629e5b46c75SDarrick J. Wong uint64_t len = xfarray_pos(si->array, hi - lo);
630e5b46c75SDarrick J. Wong int error = 0;
631e5b46c75SDarrick J. Wong
632e5b46c75SDarrick J. Wong trace_xfarray_pagesort(si, lo, hi);
633e5b46c75SDarrick J. Wong
634e5b46c75SDarrick J. Wong xfarray_sort_bump_loads(si);
635e5b46c75SDarrick J. Wong error = xfarray_sort_get_page(si, lo_pos, len);
636e5b46c75SDarrick J. Wong if (error)
637e5b46c75SDarrick J. Wong return error;
638e5b46c75SDarrick J. Wong
639e5b46c75SDarrick J. Wong xfarray_sort_bump_heapsorts(si);
640e5b46c75SDarrick J. Wong startp = si->page_kaddr + offset_in_page(lo_pos);
641e5b46c75SDarrick J. Wong sort(startp, hi - lo + 1, si->array->obj_size, si->cmp_fn, NULL);
642e5b46c75SDarrick J. Wong
643e5b46c75SDarrick J. Wong xfarray_sort_bump_stores(si);
644e5b46c75SDarrick J. Wong return xfarray_sort_put_page(si);
645e5b46c75SDarrick J. Wong }
646e5b46c75SDarrick J. Wong
647232ea052SDarrick J. Wong /* Return a pointer to the xfarray pivot record within the sortinfo struct. */
xfarray_sortinfo_pivot(struct xfarray_sortinfo * si)648232ea052SDarrick J. Wong static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
649232ea052SDarrick J. Wong {
650232ea052SDarrick J. Wong return xfarray_sortinfo_hi(si) + si->max_stack_depth;
651232ea052SDarrick J. Wong }
652232ea052SDarrick J. Wong
653*764018caSDarrick J. Wong /* Return a pointer to the start of the pivot array. */
654*764018caSDarrick J. Wong static inline void *
xfarray_sortinfo_pivot_array(struct xfarray_sortinfo * si)655*764018caSDarrick J. Wong xfarray_sortinfo_pivot_array(
656*764018caSDarrick J. Wong struct xfarray_sortinfo *si)
657*764018caSDarrick J. Wong {
658*764018caSDarrick J. Wong return xfarray_sortinfo_pivot(si) + si->array->obj_size;
659*764018caSDarrick J. Wong }
660*764018caSDarrick J. Wong
661*764018caSDarrick J. Wong /* The xfarray record is stored at the start of each pivot array element. */
662*764018caSDarrick J. Wong static inline void *
xfarray_pivot_array_rec(void * pa,size_t pa_recsz,unsigned int pa_idx)663*764018caSDarrick J. Wong xfarray_pivot_array_rec(
664*764018caSDarrick J. Wong void *pa,
665*764018caSDarrick J. Wong size_t pa_recsz,
666*764018caSDarrick J. Wong unsigned int pa_idx)
667*764018caSDarrick J. Wong {
668*764018caSDarrick J. Wong return pa + (pa_recsz * pa_idx);
669*764018caSDarrick J. Wong }
670*764018caSDarrick J. Wong
671*764018caSDarrick J. Wong /* The xfarray index is stored at the end of each pivot array element. */
672*764018caSDarrick J. Wong static inline xfarray_idx_t *
xfarray_pivot_array_idx(void * pa,size_t pa_recsz,unsigned int pa_idx)673*764018caSDarrick J. Wong xfarray_pivot_array_idx(
674*764018caSDarrick J. Wong void *pa,
675*764018caSDarrick J. Wong size_t pa_recsz,
676*764018caSDarrick J. Wong unsigned int pa_idx)
677*764018caSDarrick J. Wong {
678*764018caSDarrick J. Wong return xfarray_pivot_array_rec(pa, pa_recsz, pa_idx + 1) -
679*764018caSDarrick J. Wong sizeof(xfarray_idx_t);
680*764018caSDarrick J. Wong }
681*764018caSDarrick J. Wong
682232ea052SDarrick J. Wong /*
683232ea052SDarrick J. Wong * Find a pivot value for quicksort partitioning, swap it with a[lo], and save
684232ea052SDarrick J. Wong * the cached pivot record for the next step.
685232ea052SDarrick J. Wong *
686*764018caSDarrick J. Wong * Load evenly-spaced records within the given range into memory, sort them,
687*764018caSDarrick J. Wong * and choose the pivot from the median record. Using multiple points will
688*764018caSDarrick J. Wong * improve the quality of the pivot selection, and hopefully avoid the worst
689*764018caSDarrick J. Wong * quicksort behavior, since our array values are nearly always evenly sorted.
690232ea052SDarrick J. Wong */
691232ea052SDarrick J. Wong STATIC int
xfarray_qsort_pivot(struct xfarray_sortinfo * si,xfarray_idx_t lo,xfarray_idx_t hi)692232ea052SDarrick J. Wong xfarray_qsort_pivot(
693232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
694232ea052SDarrick J. Wong xfarray_idx_t lo,
695232ea052SDarrick J. Wong xfarray_idx_t hi)
696232ea052SDarrick J. Wong {
697*764018caSDarrick J. Wong void *pivot = xfarray_sortinfo_pivot(si);
698*764018caSDarrick J. Wong void *parray = xfarray_sortinfo_pivot_array(si);
699*764018caSDarrick J. Wong void *recp;
700*764018caSDarrick J. Wong xfarray_idx_t *idxp;
701*764018caSDarrick J. Wong xfarray_idx_t step = (hi - lo) / (XFARRAY_QSORT_PIVOT_NR - 1);
702*764018caSDarrick J. Wong size_t pivot_rec_sz = xfarray_pivot_rec_sz(si->array);
703*764018caSDarrick J. Wong int i, j;
704232ea052SDarrick J. Wong int error;
705232ea052SDarrick J. Wong
706*764018caSDarrick J. Wong ASSERT(step > 0);
707232ea052SDarrick J. Wong
708232ea052SDarrick J. Wong /*
709*764018caSDarrick J. Wong * Load the xfarray indexes of the records we intend to sample into the
710*764018caSDarrick J. Wong * pivot array.
711232ea052SDarrick J. Wong */
712*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, 0);
713*764018caSDarrick J. Wong *idxp = lo;
714*764018caSDarrick J. Wong for (i = 1; i < XFARRAY_QSORT_PIVOT_NR - 1; i++) {
715*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
716*764018caSDarrick J. Wong *idxp = lo + (i * step);
717*764018caSDarrick J. Wong }
718*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
719*764018caSDarrick J. Wong XFARRAY_QSORT_PIVOT_NR - 1);
720*764018caSDarrick J. Wong *idxp = hi;
721*764018caSDarrick J. Wong
722*764018caSDarrick J. Wong /* Load the selected xfarray records into the pivot array. */
723*764018caSDarrick J. Wong for (i = 0; i < XFARRAY_QSORT_PIVOT_NR; i++) {
724*764018caSDarrick J. Wong xfarray_idx_t idx;
725*764018caSDarrick J. Wong
726*764018caSDarrick J. Wong recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, i);
727*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
728*764018caSDarrick J. Wong
729*764018caSDarrick J. Wong /* No unset records; load directly into the array. */
730*764018caSDarrick J. Wong if (likely(si->array->unset_slots == 0)) {
731*764018caSDarrick J. Wong error = xfarray_sort_load(si, *idxp, recp);
732232ea052SDarrick J. Wong if (error)
733232ea052SDarrick J. Wong return error;
734*764018caSDarrick J. Wong continue;
735*764018caSDarrick J. Wong }
736*764018caSDarrick J. Wong
737*764018caSDarrick J. Wong /*
738*764018caSDarrick J. Wong * Load non-null records into the scratchpad without changing
739*764018caSDarrick J. Wong * the xfarray_idx_t in the pivot array.
740*764018caSDarrick J. Wong */
741*764018caSDarrick J. Wong idx = *idxp;
742*764018caSDarrick J. Wong xfarray_sort_bump_loads(si);
743*764018caSDarrick J. Wong error = xfarray_load_next(si->array, &idx, recp);
744232ea052SDarrick J. Wong if (error)
745232ea052SDarrick J. Wong return error;
746*764018caSDarrick J. Wong }
747*764018caSDarrick J. Wong
748*764018caSDarrick J. Wong xfarray_sort_bump_heapsorts(si);
749*764018caSDarrick J. Wong sort(parray, XFARRAY_QSORT_PIVOT_NR, pivot_rec_sz, si->cmp_fn, NULL);
750*764018caSDarrick J. Wong
751*764018caSDarrick J. Wong /*
752*764018caSDarrick J. Wong * We sorted the pivot array records (which includes the xfarray
753*764018caSDarrick J. Wong * indices) in xfarray record order. The median element of the pivot
754*764018caSDarrick J. Wong * array contains the xfarray record that we will use as the pivot.
755*764018caSDarrick J. Wong * Copy that xfarray record to the designated space.
756*764018caSDarrick J. Wong */
757*764018caSDarrick J. Wong recp = xfarray_pivot_array_rec(parray, pivot_rec_sz,
758*764018caSDarrick J. Wong XFARRAY_QSORT_PIVOT_NR / 2);
759*764018caSDarrick J. Wong memcpy(pivot, recp, si->array->obj_size);
760*764018caSDarrick J. Wong
761*764018caSDarrick J. Wong /* If the pivot record we chose was already in a[lo] then we're done. */
762*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
763*764018caSDarrick J. Wong XFARRAY_QSORT_PIVOT_NR / 2);
764*764018caSDarrick J. Wong if (*idxp == lo)
765*764018caSDarrick J. Wong return 0;
766*764018caSDarrick J. Wong
767*764018caSDarrick J. Wong /*
768*764018caSDarrick J. Wong * Find the cached copy of a[lo] in the pivot array so that we can swap
769*764018caSDarrick J. Wong * a[lo] and a[pivot].
770*764018caSDarrick J. Wong */
771*764018caSDarrick J. Wong for (i = 0, j = -1; i < XFARRAY_QSORT_PIVOT_NR; i++) {
772*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
773*764018caSDarrick J. Wong if (*idxp == lo)
774*764018caSDarrick J. Wong j = i;
775*764018caSDarrick J. Wong }
776*764018caSDarrick J. Wong if (j < 0) {
777*764018caSDarrick J. Wong ASSERT(j >= 0);
778*764018caSDarrick J. Wong return -EFSCORRUPTED;
779*764018caSDarrick J. Wong }
780*764018caSDarrick J. Wong
781*764018caSDarrick J. Wong /* Swap a[lo] and a[pivot]. */
782*764018caSDarrick J. Wong error = xfarray_sort_store(si, lo, pivot);
783232ea052SDarrick J. Wong if (error)
784232ea052SDarrick J. Wong return error;
785*764018caSDarrick J. Wong
786*764018caSDarrick J. Wong recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, j);
787*764018caSDarrick J. Wong idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
788*764018caSDarrick J. Wong XFARRAY_QSORT_PIVOT_NR / 2);
789*764018caSDarrick J. Wong return xfarray_sort_store(si, *idxp, recp);
790232ea052SDarrick J. Wong }
791232ea052SDarrick J. Wong
792232ea052SDarrick J. Wong /*
793232ea052SDarrick J. Wong * Set up the pointers for the next iteration. We push onto the stack all of
794232ea052SDarrick J. Wong * the unsorted values between a[lo + 1] and a[end[i]], and we tweak the
795232ea052SDarrick J. Wong * current stack frame to point to the unsorted values between a[beg[i]] and
796232ea052SDarrick J. Wong * a[lo] so that those values will be sorted when we pop the stack.
797232ea052SDarrick J. Wong */
798232ea052SDarrick J. Wong static inline int
xfarray_qsort_push(struct xfarray_sortinfo * si,xfarray_idx_t * si_lo,xfarray_idx_t * si_hi,xfarray_idx_t lo,xfarray_idx_t hi)799232ea052SDarrick J. Wong xfarray_qsort_push(
800232ea052SDarrick J. Wong struct xfarray_sortinfo *si,
801232ea052SDarrick J. Wong xfarray_idx_t *si_lo,
802232ea052SDarrick J. Wong xfarray_idx_t *si_hi,
803232ea052SDarrick J. Wong xfarray_idx_t lo,
804232ea052SDarrick J. Wong xfarray_idx_t hi)
805232ea052SDarrick J. Wong {
806232ea052SDarrick J. Wong /* Check for stack overflows */
807232ea052SDarrick J. Wong if (si->stack_depth >= si->max_stack_depth - 1) {
808232ea052SDarrick J. Wong ASSERT(si->stack_depth < si->max_stack_depth - 1);
809232ea052SDarrick J. Wong return -EFSCORRUPTED;
810232ea052SDarrick J. Wong }
811232ea052SDarrick J. Wong
812232ea052SDarrick J. Wong si->max_stack_used = max_t(uint8_t, si->max_stack_used,
813232ea052SDarrick J. Wong si->stack_depth + 2);
814232ea052SDarrick J. Wong
815232ea052SDarrick J. Wong si_lo[si->stack_depth + 1] = lo + 1;
816232ea052SDarrick J. Wong si_hi[si->stack_depth + 1] = si_hi[si->stack_depth];
817232ea052SDarrick J. Wong si_hi[si->stack_depth++] = lo - 1;
818232ea052SDarrick J. Wong
819232ea052SDarrick J. Wong /*
820232ea052SDarrick J. Wong * Always start with the smaller of the two partitions to keep the
821232ea052SDarrick J. Wong * amount of recursion in check.
822232ea052SDarrick J. Wong */
823232ea052SDarrick J. Wong if (si_hi[si->stack_depth] - si_lo[si->stack_depth] >
824232ea052SDarrick J. Wong si_hi[si->stack_depth - 1] - si_lo[si->stack_depth - 1]) {
825232ea052SDarrick J. Wong swap(si_lo[si->stack_depth], si_lo[si->stack_depth - 1]);
826232ea052SDarrick J. Wong swap(si_hi[si->stack_depth], si_hi[si->stack_depth - 1]);
827232ea052SDarrick J. Wong }
828232ea052SDarrick J. Wong
829232ea052SDarrick J. Wong return 0;
830232ea052SDarrick J. Wong }
831232ea052SDarrick J. Wong
832232ea052SDarrick J. Wong /*
833cf36f4f6SDarrick J. Wong * Load an element from the array into the first scratchpad and cache the page,
834cf36f4f6SDarrick J. Wong * if possible.
835cf36f4f6SDarrick J. Wong */
836cf36f4f6SDarrick J. Wong static inline int
xfarray_sort_load_cached(struct xfarray_sortinfo * si,xfarray_idx_t idx,void * ptr)837cf36f4f6SDarrick J. Wong xfarray_sort_load_cached(
838cf36f4f6SDarrick J. Wong struct xfarray_sortinfo *si,
839cf36f4f6SDarrick J. Wong xfarray_idx_t idx,
840cf36f4f6SDarrick J. Wong void *ptr)
841cf36f4f6SDarrick J. Wong {
842cf36f4f6SDarrick J. Wong loff_t idx_pos = xfarray_pos(si->array, idx);
843cf36f4f6SDarrick J. Wong pgoff_t startpage;
844cf36f4f6SDarrick J. Wong pgoff_t endpage;
845cf36f4f6SDarrick J. Wong int error = 0;
846cf36f4f6SDarrick J. Wong
847cf36f4f6SDarrick J. Wong /*
848cf36f4f6SDarrick J. Wong * If this load would split a page, release the cached page, if any,
849cf36f4f6SDarrick J. Wong * and perform a traditional read.
850cf36f4f6SDarrick J. Wong */
851cf36f4f6SDarrick J. Wong startpage = idx_pos >> PAGE_SHIFT;
852cf36f4f6SDarrick J. Wong endpage = (idx_pos + si->array->obj_size - 1) >> PAGE_SHIFT;
853cf36f4f6SDarrick J. Wong if (startpage != endpage) {
854cf36f4f6SDarrick J. Wong error = xfarray_sort_put_page(si);
855cf36f4f6SDarrick J. Wong if (error)
856cf36f4f6SDarrick J. Wong return error;
857cf36f4f6SDarrick J. Wong
858cf36f4f6SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
859cf36f4f6SDarrick J. Wong return error;
860cf36f4f6SDarrick J. Wong
861cf36f4f6SDarrick J. Wong return xfile_obj_load(si->array->xfile, ptr,
862cf36f4f6SDarrick J. Wong si->array->obj_size, idx_pos);
863cf36f4f6SDarrick J. Wong }
864cf36f4f6SDarrick J. Wong
865cf36f4f6SDarrick J. Wong /* If the cached page is not the one we want, release it. */
866cf36f4f6SDarrick J. Wong if (xfile_page_cached(&si->xfpage) &&
867cf36f4f6SDarrick J. Wong xfile_page_index(&si->xfpage) != startpage) {
868cf36f4f6SDarrick J. Wong error = xfarray_sort_put_page(si);
869cf36f4f6SDarrick J. Wong if (error)
870cf36f4f6SDarrick J. Wong return error;
871cf36f4f6SDarrick J. Wong }
872cf36f4f6SDarrick J. Wong
873cf36f4f6SDarrick J. Wong /*
874cf36f4f6SDarrick J. Wong * If we don't have a cached page (and we know the load is contained
875cf36f4f6SDarrick J. Wong * in a single page) then grab it.
876cf36f4f6SDarrick J. Wong */
877cf36f4f6SDarrick J. Wong if (!xfile_page_cached(&si->xfpage)) {
878cf36f4f6SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
879cf36f4f6SDarrick J. Wong return error;
880cf36f4f6SDarrick J. Wong
881cf36f4f6SDarrick J. Wong error = xfarray_sort_get_page(si, startpage << PAGE_SHIFT,
882cf36f4f6SDarrick J. Wong PAGE_SIZE);
883cf36f4f6SDarrick J. Wong if (error)
884cf36f4f6SDarrick J. Wong return error;
885cf36f4f6SDarrick J. Wong }
886cf36f4f6SDarrick J. Wong
887cf36f4f6SDarrick J. Wong memcpy(ptr, si->page_kaddr + offset_in_page(idx_pos),
888cf36f4f6SDarrick J. Wong si->array->obj_size);
889cf36f4f6SDarrick J. Wong return 0;
890cf36f4f6SDarrick J. Wong }
891cf36f4f6SDarrick J. Wong
892cf36f4f6SDarrick J. Wong /*
893232ea052SDarrick J. Wong * Sort the array elements via quicksort. This implementation incorporates
894232ea052SDarrick J. Wong * four optimizations discussed in Sedgewick:
895232ea052SDarrick J. Wong *
896232ea052SDarrick J. Wong * 1. Use an explicit stack of array indices to store the next array partition
897232ea052SDarrick J. Wong * to sort. This helps us to avoid recursion in the call stack, which is
898232ea052SDarrick J. Wong * particularly expensive in the kernel.
899232ea052SDarrick J. Wong *
900232ea052SDarrick J. Wong * 2. For arrays with records in arbitrary or user-controlled order, choose the
901*764018caSDarrick J. Wong * pivot element using a median-of-nine decision tree. This reduces the
902232ea052SDarrick J. Wong * probability of selecting a bad pivot value which causes worst case
903232ea052SDarrick J. Wong * behavior (i.e. partition sizes of 1).
904232ea052SDarrick J. Wong *
905232ea052SDarrick J. Wong * 3. The smaller of the two sub-partitions is pushed onto the stack to start
906232ea052SDarrick J. Wong * the next level of recursion, and the larger sub-partition replaces the
907232ea052SDarrick J. Wong * current stack frame. This guarantees that we won't need more than
908232ea052SDarrick J. Wong * log2(nr) stack space.
909232ea052SDarrick J. Wong *
910c390c645SDarrick J. Wong * 4. For small sets, load the records into the scratchpad and run heapsort on
911c390c645SDarrick J. Wong * them because that is very fast. In the author's experience, this yields
912232ea052SDarrick J. Wong * a ~10% reduction in runtime.
913e5b46c75SDarrick J. Wong *
914e5b46c75SDarrick J. Wong * If a small set is contained entirely within a single xfile memory page,
915e5b46c75SDarrick J. Wong * map the page directly and run heap sort directly on the xfile page
916e5b46c75SDarrick J. Wong * instead of using the load/store interface. This halves the runtime.
917cf36f4f6SDarrick J. Wong *
918cf36f4f6SDarrick J. Wong * 5. This optimization is specific to the implementation. When converging lo
919cf36f4f6SDarrick J. Wong * and hi after selecting a pivot, we will try to retain the xfile memory
920cf36f4f6SDarrick J. Wong * page between load calls, which reduces run time by 50%.
921232ea052SDarrick J. Wong */
922232ea052SDarrick J. Wong
923232ea052SDarrick J. Wong /*
924232ea052SDarrick J. Wong * Due to the use of signed indices, we can only support up to 2^63 records.
925232ea052SDarrick J. Wong * Files can only grow to 2^63 bytes, so this is not much of a limitation.
926232ea052SDarrick J. Wong */
927232ea052SDarrick J. Wong #define QSORT_MAX_RECS (1ULL << 63)
928232ea052SDarrick J. Wong
929232ea052SDarrick J. Wong int
xfarray_sort(struct xfarray * array,xfarray_cmp_fn cmp_fn,unsigned int flags)930232ea052SDarrick J. Wong xfarray_sort(
931232ea052SDarrick J. Wong struct xfarray *array,
932232ea052SDarrick J. Wong xfarray_cmp_fn cmp_fn,
933232ea052SDarrick J. Wong unsigned int flags)
934232ea052SDarrick J. Wong {
935232ea052SDarrick J. Wong struct xfarray_sortinfo *si;
936232ea052SDarrick J. Wong xfarray_idx_t *si_lo, *si_hi;
937232ea052SDarrick J. Wong void *pivot;
938232ea052SDarrick J. Wong void *scratch = xfarray_scratch(array);
939232ea052SDarrick J. Wong xfarray_idx_t lo, hi;
940232ea052SDarrick J. Wong int error = 0;
941232ea052SDarrick J. Wong
942232ea052SDarrick J. Wong if (array->nr < 2)
943232ea052SDarrick J. Wong return 0;
944232ea052SDarrick J. Wong if (array->nr >= QSORT_MAX_RECS)
945232ea052SDarrick J. Wong return -E2BIG;
946232ea052SDarrick J. Wong
947232ea052SDarrick J. Wong error = xfarray_sortinfo_alloc(array, cmp_fn, flags, &si);
948232ea052SDarrick J. Wong if (error)
949232ea052SDarrick J. Wong return error;
950232ea052SDarrick J. Wong si_lo = xfarray_sortinfo_lo(si);
951232ea052SDarrick J. Wong si_hi = xfarray_sortinfo_hi(si);
952232ea052SDarrick J. Wong pivot = xfarray_sortinfo_pivot(si);
953232ea052SDarrick J. Wong
954232ea052SDarrick J. Wong while (si->stack_depth >= 0) {
955232ea052SDarrick J. Wong lo = si_lo[si->stack_depth];
956232ea052SDarrick J. Wong hi = si_hi[si->stack_depth];
957232ea052SDarrick J. Wong
958232ea052SDarrick J. Wong trace_xfarray_qsort(si, lo, hi);
959232ea052SDarrick J. Wong
960232ea052SDarrick J. Wong /* Nothing left in this partition to sort; pop stack. */
961232ea052SDarrick J. Wong if (lo >= hi) {
962232ea052SDarrick J. Wong si->stack_depth--;
963232ea052SDarrick J. Wong continue;
964232ea052SDarrick J. Wong }
965232ea052SDarrick J. Wong
966e5b46c75SDarrick J. Wong /*
967e5b46c75SDarrick J. Wong * If directly mapping the page and sorting can solve our
968e5b46c75SDarrick J. Wong * problems, we're done.
969e5b46c75SDarrick J. Wong */
970e5b46c75SDarrick J. Wong if (xfarray_want_pagesort(si, lo, hi)) {
971e5b46c75SDarrick J. Wong error = xfarray_pagesort(si, lo, hi);
972e5b46c75SDarrick J. Wong if (error)
973e5b46c75SDarrick J. Wong goto out_free;
974e5b46c75SDarrick J. Wong si->stack_depth--;
975e5b46c75SDarrick J. Wong continue;
976e5b46c75SDarrick J. Wong }
977e5b46c75SDarrick J. Wong
978232ea052SDarrick J. Wong /* If insertion sort can solve our problems, we're done. */
979232ea052SDarrick J. Wong if (xfarray_want_isort(si, lo, hi)) {
980232ea052SDarrick J. Wong error = xfarray_isort(si, lo, hi);
981232ea052SDarrick J. Wong if (error)
982232ea052SDarrick J. Wong goto out_free;
983232ea052SDarrick J. Wong si->stack_depth--;
984232ea052SDarrick J. Wong continue;
985232ea052SDarrick J. Wong }
986232ea052SDarrick J. Wong
987232ea052SDarrick J. Wong /* Pick a pivot, move it to a[lo] and stash it. */
988232ea052SDarrick J. Wong error = xfarray_qsort_pivot(si, lo, hi);
989232ea052SDarrick J. Wong if (error)
990232ea052SDarrick J. Wong goto out_free;
991232ea052SDarrick J. Wong
992232ea052SDarrick J. Wong /*
993232ea052SDarrick J. Wong * Rearrange a[lo..hi] such that everything smaller than the
994232ea052SDarrick J. Wong * pivot is on the left side of the range and everything larger
995232ea052SDarrick J. Wong * than the pivot is on the right side of the range.
996232ea052SDarrick J. Wong */
997232ea052SDarrick J. Wong while (lo < hi) {
998232ea052SDarrick J. Wong /*
999232ea052SDarrick J. Wong * Decrement hi until it finds an a[hi] less than the
1000232ea052SDarrick J. Wong * pivot value.
1001232ea052SDarrick J. Wong */
1002cf36f4f6SDarrick J. Wong error = xfarray_sort_load_cached(si, hi, scratch);
1003232ea052SDarrick J. Wong if (error)
1004232ea052SDarrick J. Wong goto out_free;
1005232ea052SDarrick J. Wong while (xfarray_sort_cmp(si, scratch, pivot) >= 0 &&
1006232ea052SDarrick J. Wong lo < hi) {
1007232ea052SDarrick J. Wong hi--;
1008cf36f4f6SDarrick J. Wong error = xfarray_sort_load_cached(si, hi,
1009cf36f4f6SDarrick J. Wong scratch);
1010232ea052SDarrick J. Wong if (error)
1011232ea052SDarrick J. Wong goto out_free;
1012232ea052SDarrick J. Wong }
1013cf36f4f6SDarrick J. Wong error = xfarray_sort_put_page(si);
1014cf36f4f6SDarrick J. Wong if (error)
1015cf36f4f6SDarrick J. Wong goto out_free;
1016232ea052SDarrick J. Wong
1017232ea052SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
1018232ea052SDarrick J. Wong goto out_free;
1019232ea052SDarrick J. Wong
1020232ea052SDarrick J. Wong /* Copy that item (a[hi]) to a[lo]. */
1021232ea052SDarrick J. Wong if (lo < hi) {
1022232ea052SDarrick J. Wong error = xfarray_sort_store(si, lo++, scratch);
1023232ea052SDarrick J. Wong if (error)
1024232ea052SDarrick J. Wong goto out_free;
1025232ea052SDarrick J. Wong }
1026232ea052SDarrick J. Wong
1027232ea052SDarrick J. Wong /*
1028232ea052SDarrick J. Wong * Increment lo until it finds an a[lo] greater than
1029232ea052SDarrick J. Wong * the pivot value.
1030232ea052SDarrick J. Wong */
1031cf36f4f6SDarrick J. Wong error = xfarray_sort_load_cached(si, lo, scratch);
1032232ea052SDarrick J. Wong if (error)
1033232ea052SDarrick J. Wong goto out_free;
1034232ea052SDarrick J. Wong while (xfarray_sort_cmp(si, scratch, pivot) <= 0 &&
1035232ea052SDarrick J. Wong lo < hi) {
1036232ea052SDarrick J. Wong lo++;
1037cf36f4f6SDarrick J. Wong error = xfarray_sort_load_cached(si, lo,
1038cf36f4f6SDarrick J. Wong scratch);
1039232ea052SDarrick J. Wong if (error)
1040232ea052SDarrick J. Wong goto out_free;
1041232ea052SDarrick J. Wong }
1042cf36f4f6SDarrick J. Wong error = xfarray_sort_put_page(si);
1043cf36f4f6SDarrick J. Wong if (error)
1044cf36f4f6SDarrick J. Wong goto out_free;
1045232ea052SDarrick J. Wong
1046232ea052SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
1047232ea052SDarrick J. Wong goto out_free;
1048232ea052SDarrick J. Wong
1049232ea052SDarrick J. Wong /* Copy that item (a[lo]) to a[hi]. */
1050232ea052SDarrick J. Wong if (lo < hi) {
1051232ea052SDarrick J. Wong error = xfarray_sort_store(si, hi--, scratch);
1052232ea052SDarrick J. Wong if (error)
1053232ea052SDarrick J. Wong goto out_free;
1054232ea052SDarrick J. Wong }
1055232ea052SDarrick J. Wong
1056232ea052SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
1057232ea052SDarrick J. Wong goto out_free;
1058232ea052SDarrick J. Wong }
1059232ea052SDarrick J. Wong
1060232ea052SDarrick J. Wong /*
1061232ea052SDarrick J. Wong * Put our pivot value in the correct place at a[lo]. All
1062232ea052SDarrick J. Wong * values between a[beg[i]] and a[lo - 1] should be less than
1063232ea052SDarrick J. Wong * the pivot; and all values between a[lo + 1] and a[end[i]-1]
1064232ea052SDarrick J. Wong * should be greater than the pivot.
1065232ea052SDarrick J. Wong */
1066232ea052SDarrick J. Wong error = xfarray_sort_store(si, lo, pivot);
1067232ea052SDarrick J. Wong if (error)
1068232ea052SDarrick J. Wong goto out_free;
1069232ea052SDarrick J. Wong
1070232ea052SDarrick J. Wong /* Set up the stack frame to process the two partitions. */
1071232ea052SDarrick J. Wong error = xfarray_qsort_push(si, si_lo, si_hi, lo, hi);
1072232ea052SDarrick J. Wong if (error)
1073232ea052SDarrick J. Wong goto out_free;
1074232ea052SDarrick J. Wong
1075232ea052SDarrick J. Wong if (xfarray_sort_terminated(si, &error))
1076232ea052SDarrick J. Wong goto out_free;
1077232ea052SDarrick J. Wong }
1078232ea052SDarrick J. Wong
1079232ea052SDarrick J. Wong out_free:
1080232ea052SDarrick J. Wong trace_xfarray_sort_stats(si, error);
1081232ea052SDarrick J. Wong kvfree(si);
1082232ea052SDarrick J. Wong return error;
1083232ea052SDarrick J. Wong }
1084