diff --git a/README.md b/README.md
index 533ce67..6fe5856 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Simple bash script to rebalance pool data between all mirrors when adding vdevs
## How it works
-This script recursively traverses all the files in a given directory. Each file is copied with a `.rebalance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive.
+This script recursively traverses all the files in a given directory. Each file is copied with a `.balance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive.
The way ZFS distributes writes is not trivial, which makes it hard to predict how effective the redistribution will be. See:
- https://jrs-s.net/2018/04/11/zfs-allocates-writes-according-to-free-space-per-vdev-not-latency-per-vdev/
@@ -100,6 +100,7 @@ You can print a help message by running the script without any parameters:
|-----------|-------------|---------|
| `-c`
`--checksum` | Whether to compare attributes and content of the copied file using an **MD5** checksum. Technically this is a redundent check and consumes a lot of resources, so think twice. | `true` |
| `-p`
`--passes` | The maximum number of rebalance passes per file. Setting this to infinity by using a value `<= 0` might improve performance when rebalancing a lot of small files. | `1` |
+| `--skip-hardlinks` | Skip rebalancing hardlinked files, since it will only create duplicate data. | `false` |
### Example
@@ -133,7 +134,7 @@ tail -F ./stdout.log
Although this script **does** have a progress output (files as well as percentage) it might be a good idea to try a small subfolder first, or process your pool folder layout in manually selected badges. This can also limit the damage done, if anything bad happens.
-When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".rebalance" file might be left and you have to rename (or delete) it manually.
+When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".balance" file might be left and you have to rename (or delete) it manually.
Although the `--passes` parameter can be used to limit the maximum amount of rebalance passes per file, it is only meant to speedup aborted runs. Individual files will **not be process multiple times automatically**. To reach multiple passes you have to run the script on the same target directory multiple times.
diff --git a/testing.sh b/testing.sh
index 3c92f2b..b46f74f 100755
--- a/testing.sh
+++ b/testing.sh
@@ -30,6 +30,20 @@ function assertions() {
fi
}
+function assert_matching_file_copied() {
+ if ! grep "Copying" $log_std_file | grep -q "$1"; then
+ echo "File matching '$1' was not copied when it should have been!"
+ exit 1
+ fi
+}
+
+function assert_matching_file_not_copied() {
+ if grep "Copying" $log_std_file | grep -q "$1"; then
+ echo "File matching '$1' was copied when it should have been skipped!"
+ exit 1
+ fi
+}
+
prepare
./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
@@ -44,3 +58,21 @@ prepare
./zfs-inplace-rebalancing.sh --checksum false $test_pool_data_path >> $log_std_file 2>> $log_error_file
cat $log_std_file
assertions
+
+prepare
+ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
+./zfs-inplace-rebalancing.sh --skip-hardlinks false $test_pool_data_path >> $log_std_file 2>> $log_error_file
+cat $log_std_file
+# Both link files should be copied
+assert_matching_file_copied "mp4.txt"
+assert_matching_file_copied "mp4.txt.link"
+assertions
+
+prepare
+ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
+./zfs-inplace-rebalancing.sh --skip-hardlinks true $test_pool_data_path >> $log_std_file 2>> $log_error_file
+cat $log_std_file
+# Neither file should be copied now, since they are each a hardlink
+assert_matching_file_not_copied "mp4.txt.link"
+assert_matching_file_not_copied "mp4.txt"
+assertions
diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh
index 05679db..66b8224 100755
--- a/zfs-inplace-rebalancing.sh
+++ b/zfs-inplace-rebalancing.sh
@@ -26,7 +26,7 @@ Cyan='\033[0;36m' # Cyan
# print a help message
function print_usage() {
- echo "Usage: zfs-inplace-rebalancing --checksum true --passes 1 /my/pool"
+ echo "Usage: zfs-inplace-rebalancing --checksum true --skip-hardlinks false --passes 1 /my/pool"
}
# print a given text entirely in a given color
@@ -56,6 +56,18 @@ function get_rebalance_count () {
function rebalance () {
file_path=$1
+ # check if file has >=2 links in the case of --skip-hardlinks
+ # this shouldn't be needed in the typical case of `find` only finding files with links == 1
+ # but this can run for a long time, so it's good to double check if something changed
+ if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
+ hardlink_count=$(stat -c "%h" "${file_path}")
+
+ if [ "${hardlink_count}" -ge 2 ]; then
+ echo "Skipping hard-linked file: ${file_path}"
+ return
+ fi
+ fi
+
current_index="$((current_index + 1))"
progress_percent=$(echo "scale=2; ${current_index}*100/${file_count}" | bc)
color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)"
@@ -175,15 +187,20 @@ function rebalance () {
}
checksum_flag='true'
+skip_hardlinks_flag='false'
passes_flag='1'
-if [ "$#" -eq 0 ]; then
+if [[ "$#" -eq 0 ]]; then
print_usage
exit 0
fi
while true ; do
case "$1" in
+ -h | --help )
+ print_usage
+ exit 0
+ ;;
-c | --checksum )
if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
checksum_flag="true"
@@ -192,6 +209,14 @@ while true ; do
fi
shift 2
;;
+ --skip-hardlinks )
+ if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
+ skip_hardlinks_flag="true"
+ else
+ skip_hardlinks_flag="false"
+ fi
+ shift 2
+ ;;
-p | --passes )
passes_flag=$2
shift 2
@@ -208,9 +233,15 @@ color_echo "$Cyan" "Start rebalancing:"
color_echo "$Cyan" " Path: ${root_path}"
color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}"
color_echo "$Cyan" " Use Checksum: ${checksum_flag}"
+color_echo "$Cyan" " Skip Hardlinks: ${skip_hardlinks_flag}"
# count files
-file_count=$(find "${root_path}" -type f | wc -l)
+if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
+ file_count=$(find "${root_path}" -type f -links 1 | wc -l)
+else
+ file_count=$(find "${root_path}" -type f | wc -l)
+fi
+
color_echo "$Cyan" " File count: ${file_count}"
# create db file
@@ -219,7 +250,13 @@ if [ "${passes_flag}" -ge 1 ]; then
fi
# recursively scan through files and execute "rebalance" procedure
-find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
+# in the case of --skip-hardlinks, only find files with links == 1
+if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
+ find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
+else
+ find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
+fi
+
echo ""
echo ""
color_echo "$Green" "Done!"