Home
last modified time | relevance | path

Searched hist:ace75066ced9b9abf432049699d0f9f911d8e496 (Results 1 – 3 of 3) sorted by relevance

/openbmc/linux/fs/btrfs/
H A Dsend.cdiff ace75066ced9b9abf432049699d0f9f911d8e496 Wed Mar 31 05:56:21 CDT 2021 Filipe Manana <fdmanana@suse.com> btrfs: improve btree readahead for full send operations

Currently a full send operation uses the standard btree readahead when
iterating over the subvolume/snapshot btree, which despite bringing good
performance benefits, it could be improved in a few aspects for use cases
such as full send operations, which are guaranteed to visit every node
and leaf of a btree, in ascending and sequential order. The limitations
of that standard btree readahead implementation are the following:

1) It only triggers readahead for leaves that are physically close
to the leaf being read, within a 64K range;

2) It only triggers readahead for the next or previous leaves if the
leaf being read is not currently in memory;

3) It never triggers readahead for nodes.

So add a new readahead mode that addresses all these points and use it
for full send operations.

The following test script was used to measure the improvement on a box
using an average, consumer grade, spinning disk and with 16GiB of RAM:

$ cat test.sh
#!/bin/bash

DEV=/dev/sdj
MNT=/mnt/sdj
MKFS_OPTIONS="--nodesize 16384" # default, just to be explicit
MOUNT_OPTIONS="-o max_inline=2048" # default, just to be explicit

mkfs.btrfs -f $MKFS_OPTIONS $DEV > /dev/null
mount $MOUNT_OPTIONS $DEV $MNT

# Create files with inline data to make it easier and faster to create
# large btrees.
add_files()
{
local total=$1
local start_offset=$2
local number_jobs=$3
local total_per_job=$(($total / $number_jobs))

echo "Creating $total new files using $number_jobs jobs"
for ((n = 0; n < $number_jobs; n++)); do
(
local start_num=$(($start_offset + $n * $total_per_job))
for ((i = 1; i <= $total_per_job; i++)); do
local file_num=$((start_num + $i))
local file_path="$MNT/file_${file_num}"
xfs_io -f -c "pwrite -S 0xab 0 2000" $file_path > /dev/null
if [ $? -ne 0 ]; then
echo "Failed creating file $file_path"
break
fi
done
) &
worker_pids[$n]=$!
done

wait ${worker_pids[@]}

sync
echo
echo "btree node/leaf count: $(btrfs inspect-internal dump-tree -t 5 $DEV | egrep '^(node|leaf) ' | wc -l)"
}

initial_file_count=500000
add_files $initial_file_count 0 4

echo
echo "Creating first snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap1

echo
echo "Adding more files..."
add_files $((initial_file_count / 4)) $initial_file_count 4

echo
echo "Updating 1/50th of the initial files..."
for ((i = 1; i < $initial_file_count; i += 50)); do
xfs_io -c "pwrite -S 0xcd 0 20" $MNT/file_$i > /dev/null
done

echo
echo "Creating second snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap2

umount $MNT

echo 3 > /proc/sys/vm/drop_caches
blockdev --flushbufs $DEV &> /dev/null
hdparm -F $DEV &> /dev/null

mount $MOUNT_OPTIONS $DEV $MNT

echo
echo "Testing full send..."
start=$(date +%s)
btrfs send $MNT/snap1 > /dev/null
end=$(date +%s)
echo
echo "Full send took $((end - start)) seconds"

umount $MNT

The durations of the full send operation in seconds were the following:

Before this change: 217 seconds
After this change: 205 seconds (-5.7%)

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
H A Dctree.cdiff ace75066ced9b9abf432049699d0f9f911d8e496 Wed Mar 31 05:56:21 CDT 2021 Filipe Manana <fdmanana@suse.com> btrfs: improve btree readahead for full send operations

Currently a full send operation uses the standard btree readahead when
iterating over the subvolume/snapshot btree, which despite bringing good
performance benefits, it could be improved in a few aspects for use cases
such as full send operations, which are guaranteed to visit every node
and leaf of a btree, in ascending and sequential order. The limitations
of that standard btree readahead implementation are the following:

1) It only triggers readahead for leaves that are physically close
to the leaf being read, within a 64K range;

2) It only triggers readahead for the next or previous leaves if the
leaf being read is not currently in memory;

3) It never triggers readahead for nodes.

So add a new readahead mode that addresses all these points and use it
for full send operations.

The following test script was used to measure the improvement on a box
using an average, consumer grade, spinning disk and with 16GiB of RAM:

$ cat test.sh
#!/bin/bash

DEV=/dev/sdj
MNT=/mnt/sdj
MKFS_OPTIONS="--nodesize 16384" # default, just to be explicit
MOUNT_OPTIONS="-o max_inline=2048" # default, just to be explicit

mkfs.btrfs -f $MKFS_OPTIONS $DEV > /dev/null
mount $MOUNT_OPTIONS $DEV $MNT

# Create files with inline data to make it easier and faster to create
# large btrees.
add_files()
{
local total=$1
local start_offset=$2
local number_jobs=$3
local total_per_job=$(($total / $number_jobs))

echo "Creating $total new files using $number_jobs jobs"
for ((n = 0; n < $number_jobs; n++)); do
(
local start_num=$(($start_offset + $n * $total_per_job))
for ((i = 1; i <= $total_per_job; i++)); do
local file_num=$((start_num + $i))
local file_path="$MNT/file_${file_num}"
xfs_io -f -c "pwrite -S 0xab 0 2000" $file_path > /dev/null
if [ $? -ne 0 ]; then
echo "Failed creating file $file_path"
break
fi
done
) &
worker_pids[$n]=$!
done

wait ${worker_pids[@]}

sync
echo
echo "btree node/leaf count: $(btrfs inspect-internal dump-tree -t 5 $DEV | egrep '^(node|leaf) ' | wc -l)"
}

initial_file_count=500000
add_files $initial_file_count 0 4

echo
echo "Creating first snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap1

echo
echo "Adding more files..."
add_files $((initial_file_count / 4)) $initial_file_count 4

echo
echo "Updating 1/50th of the initial files..."
for ((i = 1; i < $initial_file_count; i += 50)); do
xfs_io -c "pwrite -S 0xcd 0 20" $MNT/file_$i > /dev/null
done

echo
echo "Creating second snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap2

umount $MNT

echo 3 > /proc/sys/vm/drop_caches
blockdev --flushbufs $DEV &> /dev/null
hdparm -F $DEV &> /dev/null

mount $MOUNT_OPTIONS $DEV $MNT

echo
echo "Testing full send..."
start=$(date +%s)
btrfs send $MNT/snap1 > /dev/null
end=$(date +%s)
echo
echo "Full send took $((end - start)) seconds"

umount $MNT

The durations of the full send operation in seconds were the following:

Before this change: 217 seconds
After this change: 205 seconds (-5.7%)

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
H A Dctree.hdiff ace75066ced9b9abf432049699d0f9f911d8e496 Wed Mar 31 05:56:21 CDT 2021 Filipe Manana <fdmanana@suse.com> btrfs: improve btree readahead for full send operations

Currently a full send operation uses the standard btree readahead when
iterating over the subvolume/snapshot btree, which despite bringing good
performance benefits, it could be improved in a few aspects for use cases
such as full send operations, which are guaranteed to visit every node
and leaf of a btree, in ascending and sequential order. The limitations
of that standard btree readahead implementation are the following:

1) It only triggers readahead for leaves that are physically close
to the leaf being read, within a 64K range;

2) It only triggers readahead for the next or previous leaves if the
leaf being read is not currently in memory;

3) It never triggers readahead for nodes.

So add a new readahead mode that addresses all these points and use it
for full send operations.

The following test script was used to measure the improvement on a box
using an average, consumer grade, spinning disk and with 16GiB of RAM:

$ cat test.sh
#!/bin/bash

DEV=/dev/sdj
MNT=/mnt/sdj
MKFS_OPTIONS="--nodesize 16384" # default, just to be explicit
MOUNT_OPTIONS="-o max_inline=2048" # default, just to be explicit

mkfs.btrfs -f $MKFS_OPTIONS $DEV > /dev/null
mount $MOUNT_OPTIONS $DEV $MNT

# Create files with inline data to make it easier and faster to create
# large btrees.
add_files()
{
local total=$1
local start_offset=$2
local number_jobs=$3
local total_per_job=$(($total / $number_jobs))

echo "Creating $total new files using $number_jobs jobs"
for ((n = 0; n < $number_jobs; n++)); do
(
local start_num=$(($start_offset + $n * $total_per_job))
for ((i = 1; i <= $total_per_job; i++)); do
local file_num=$((start_num + $i))
local file_path="$MNT/file_${file_num}"
xfs_io -f -c "pwrite -S 0xab 0 2000" $file_path > /dev/null
if [ $? -ne 0 ]; then
echo "Failed creating file $file_path"
break
fi
done
) &
worker_pids[$n]=$!
done

wait ${worker_pids[@]}

sync
echo
echo "btree node/leaf count: $(btrfs inspect-internal dump-tree -t 5 $DEV | egrep '^(node|leaf) ' | wc -l)"
}

initial_file_count=500000
add_files $initial_file_count 0 4

echo
echo "Creating first snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap1

echo
echo "Adding more files..."
add_files $((initial_file_count / 4)) $initial_file_count 4

echo
echo "Updating 1/50th of the initial files..."
for ((i = 1; i < $initial_file_count; i += 50)); do
xfs_io -c "pwrite -S 0xcd 0 20" $MNT/file_$i > /dev/null
done

echo
echo "Creating second snapshot..."
btrfs subvolume snapshot -r $MNT $MNT/snap2

umount $MNT

echo 3 > /proc/sys/vm/drop_caches
blockdev --flushbufs $DEV &> /dev/null
hdparm -F $DEV &> /dev/null

mount $MOUNT_OPTIONS $DEV $MNT

echo
echo "Testing full send..."
start=$(date +%s)
btrfs send $MNT/snap1 > /dev/null
end=$(date +%s)
echo
echo "Full send took $((end - start)) seconds"

umount $MNT

The durations of the full send operation in seconds were the following:

Before this change: 217 seconds
After this change: 205 seconds (-5.7%)

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>