zfs-inplace-rebalancing/testing.sh
Colin Hebert b788358600
Support Hardlinks (Any number) - Update (#70)
* Support hardlink groups, add debug

-Adds debug functionality with extended details
-Supports detecting inode groups for hardlink processing.
-Pulls files and sorts by, then groups by inode group with awk
-Checks all files in an inode group's counts when calculating skipping counts
-Removes existing skip hardlink flag
-Removes hardlinks and recreates them directly after the balance copy/delete/move operation per inode group to minimize 'downtime'

* Update README.md to denote hardlink support

Adds details around the debug flag, hardlink support, removed --skip-hardlinks functionality, and temporary files used during the script processing.

* Add additional hardlink group database notes

* typo

* Flip default debug

* Fix space handling in paths

* Fix echo bug

Removed the 'recreating hardlinks' echo for inode groups of 1 file.

* Introduce echo_debug

* Reintroduce cp documentation

* Remove unnecessary -e flag

* Remove unused flag

* Fix broken test due to | in the filename

* Add ignore on temporary inode files

* Update tests to assert the hardlinks are still working

* Fix ShellCheck issues

* Fix pasting issue

* Make grep compatible with BSD/MacOS

* Print whole line in debug

* Fix stat for BSD

* Fix stat for BSD part 2

* Fix shellcheck entry

* Fix stats working with FreeBSD

* Use bash's double brackets for consistency sake

* Expand to a string

---------

Co-authored-by: undaunt <31376520+undaunt@users.noreply.github.com>
2025-04-15 21:12:22 +02:00

183 lines
4.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# exit script on error
set -e
# exit on undeclared variable
set -u
log_std_file=./test.log
log_error_file=./error.log
test_data_src=./test/pool
test_pool_data_path=./testing_data
test_pool_data_size_path=$test_pool_data_path/size
## Color Constants
# Reset
Color_Off='\033[0m' # Text Reset
# Regular Colors
Red='\033[0;31m' # Red
Green='\033[0;32m' # Green
Yellow='\033[0;33m' # Yellow
Cyan='\033[0;36m' # Cyan
OSName=$(echo "$OSTYPE" | tr '[:upper:]' '[:lower:]')
## Functions
# print a given text entirely in a given color
function color_echo () {
color=$1
text=$2
echo -e "${color}${text}${Color_Off}"
}
function prepare() {
# cleanup
rm -f $log_std_file
rm -f $log_error_file
rm -f rebalance_db.txt
rm -rf $test_pool_data_path
# setup
cp -rf $test_data_src $test_pool_data_path
}
# return time to the milisecond
function get_time() {
if [[ "${OSName}" == "darwin"* ]]; then
date=$(gdate +%s%N)
else
date=$(date +%s%N)
fi
echo "$date"
}
function get_inode() {
if [[ "${OSName}" == "darwin"* ]] || [[ "${OSName}" == "freebsd"* ]]; then
inode=$(stat -f "%i" "$1")
else
inode=$(stat -c "%i" "$1")
fi
echo "$inode"
}
function assertions() {
# check error log is empty
if grep -q '[^[:space:]]' $log_error_file; then
color_echo "$Red" "error log is not empty!"
cat $log_error_file
exit 1
fi
}
function assert_matching_file_hardlinked() {
if [[ "$(get_inode "$1")" != "$(get_inode "$2")" ]]; then
echo "File '$1' was not hardlinked to '$2' when it should have been!"
exit 1
fi
}
function print_time_taken(){
time_taken=$1
minute=$((time_taken / 60000))
seconde=$((time_taken % 60000 / 1000))
miliseconde=$((time_taken % 1000))
color_echo "$Yellow" "Time taken: ${minute}m ${seconde}s ${miliseconde}ms"
}
color_echo "$Cyan" "Running tests..."
color_echo "$Cyan" "Running tests with default options..."
prepare
./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
assertions
color_echo "$Green" "Tests passed!"
color_echo "$Cyan" "Running tests with checksum true and 1 pass..."
prepare
./zfs-inplace-rebalancing.sh --checksum true --passes 1 $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
assertions
color_echo "$Green" "Tests passed!"
color_echo "$Cyan" "Running tests with checksum false..."
prepare
./zfs-inplace-rebalancing.sh --checksum false $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
assertions
color_echo "$Green" "Tests passed!"
color_echo "$Cyan" "Running tests with hardlinks..."
prepare
ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
# Both link files should be copied
assert_matching_file_hardlinked "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
assertions
color_echo "$Green" "Tests passed!"
color_echo "$Cyan" "Running tests with different file count and size..."
prepare
mkdir -p $test_pool_data_size_path
color_echo "$Cyan" "Creating 1000 files of 1KB each..."
mkdir -p $test_pool_data_size_path/small
for i in {1..1000}; do
dd if=/dev/urandom of=$test_pool_data_size_path/small/file_"$i".txt bs=1024 count=1 >> /dev/null 2>&1
done
color_echo "$Cyan" "Creating 5 file of 1GB each..."
mkdir -p $test_pool_data_size_path/big
for i in {1..5}; do
dd if=/dev/urandom of=$test_pool_data_size_path/big/file_"$i".txt bs=1024 count=1048576 >> /dev/null 2>&1
done
color_echo "$Green" "Files created!"
echo "Running rebalancing on small files..."
# measure time taken
start_time=$(get_time)
./zfs-inplace-rebalancing.sh $test_pool_data_size_path/small >> $log_std_file 2>> $log_error_file
end_time=$(get_time)
time_taken=$(( (end_time - start_time) / 1000000 ))
print_time_taken $time_taken
assertions
color_echo "$Green" "Tests passed!"
echo "Running rebalancing on big files..."
rm -f rebalance_db.txt
# measure time taken
start_time=$(get_time)
./zfs-inplace-rebalancing.sh $test_pool_data_size_path/big >> $log_std_file 2>> $log_error_file
end_time=$(get_time)
time_taken=$(( (end_time - start_time) / 1000000 ))
print_time_taken $time_taken
assertions
color_echo "$Green" "Tests passed!"
echo "Running rebalancing on all files..."
rm -f rebalance_db.txt
# measure time taken
start_time=$(get_time)
./zfs-inplace-rebalancing.sh $test_pool_data_size_path >> $log_std_file 2>> $log_error_file
end_time=$(get_time)
time_taken=$(( (end_time - start_time) / 1000000 ))
print_time_taken $time_taken
assertions
color_echo "$Green" "Tests passed!"
color_echo "$Green" "All tests passed!"
color_echo "$Cyan" "Cleaning"
rm -f $log_std_file
rm -f $log_error_file
rm -f rebalance_db.txt
rm -rf $test_pool_data_path