From 0b3827f3e5ab7cd715da89c50d8fffb79b4191c7 Mon Sep 17 00:00:00 2001 From: Hayden Bilbo Date: Mon, 14 Oct 2024 23:01:24 -0500 Subject: [PATCH] Added script to rebalance hardlinked files. This one requires both directories as inputs. Modified original zfs-inplace-rebalance.sh script to only process non hard-linked files. --- notes.txt | 41 ++++++++++++ testing-hardlinks.sh | 127 ++++++++++++++++++++++++++++++++++++ zfs-hardlink-rebalancing.sh | 104 ++++++++++++++--------------- zfs-inplace-rebalancing.sh | 75 ++++++++------------- 4 files changed, 243 insertions(+), 104 deletions(-) create mode 100644 notes.txt create mode 100755 testing-hardlinks.sh diff --git a/notes.txt b/notes.txt new file mode 100644 index 0000000..fc427b7 --- /dev/null +++ b/notes.txt @@ -0,0 +1,41 @@ +ZFS Rebalancing Script + +Modifications to original reblancing script +1. Take two directory inputs to script call which contain the two locations of hardlinks. e.g. /data/media/movies and /data/torrents/movies + - These MUST be in the order /data/torrents/movies, then /data/media/movies +2. Loop through each subdirectory, if input is a file then store it with the torrent full path name as "sourcefile" +3. Function to check if the source file is a hardlink + +# Function to check if a file is a hard link +is_hardlink() { + local file="$1" + local link_count=$(stat -c '%h' "$file") + + # If the link count is greater than 1, it is a hard link + if [ "$link_count" -gt 1 ]; then + return 0 # True + else + return 1 # False + fi +} + +4. Continue on if so + +# Main script +sourcefile="torrent file path" + +if is_hardlink "$sourcefile"; then + # Proceed with further actions here +fi + +3. Get inode value of torrent file + `inode_val=$(ls -i $sourcefile | awk '{print $1}')` +4. Find files in media directory that match this inode + `filepath=$(find /data/media/movies/ -inum $test)` +6. Remove the media hardlink + `rm "$filepath"` +7. Run rebalance process on torrent hardlink +8. Recreate the media hardlink + `ln "$sourcefile" "$filepath" +9. Proceed to next file +10. ONLY PERFORM THIS PROCESS ON THE FIRST (e.g. /data/media/movies) DIRECTORY diff --git a/testing-hardlinks.sh b/testing-hardlinks.sh new file mode 100755 index 0000000..d60ec75 --- /dev/null +++ b/testing-hardlinks.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash + +# exit script on error +set -e +# exit on undeclared variable +set -u + +# file used to track processed files +rebalance_db_file_name="rebalance_db.txt" + +# index used for progress +current_index=0 + +## Color Constants + +# Reset +Color_Off='\033[0m' # Text Reset + +# Regular Colors +Red='\033[0;31m' # Red +Green='\033[0;32m' # Green +Yellow='\033[0;33m' # Yellow +Cyan='\033[0;36m' # Cyan + +## Functions + +# print a help message +function print_usage() { + echo "Usage: zfs-inplace-rebalancing --checksum true --passes 1 /data/source /data/dest" +} + +# print a given text entirely in a given color +function color_echo () { + color=$1 + text=$2 + echo -e "${color}${text}${Color_Off}" +} + + +function get_rebalance_count () { + file_path=$1 + + line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) + if [ -z "${line_nr}" ]; then + echo "0" + return + else + rebalance_count_line_nr="$((line_nr + 1))" + rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}") + echo "${rebalance_count}" + return + fi +} + +# rebalance a specific file +function rebalance () { + file_path=$1 + hardlink_dir=$2 + hardlink_count=$(stat -c "%h" "${file_path}") + + echo "File path: $file_path" + + # Find other hardlinked file and remove it + inode_val=$(ls -i "$file_path" | awk '{print $1}') + hardlink_path=$(find "$hardlink_dir" -inum $inode_val) + + echo "inode Value: $inode_val" + echo "Hardlinked File: $hardlink_path" +} + +checksum_flag='true' +passes_flag='1' + +if [[ "$#" -ne 2 ]]; then + print_usage + exit 0 +fi + +while true ; do + case "$1" in + -h | --help ) + print_usage + exit 0 + ;; + -c | --checksum ) + if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then + checksum_flag="true" + else + checksum_flag="false" + fi + shift 2 + ;; + -p | --passes ) + passes_flag=$2 + shift 2 + ;; + *) + break + ;; + esac +done; + +source_path=$1 +dest_path=$2 + +color_echo "$Cyan" "Start rebalancing $(date):" +color_echo "$Cyan" " Rebalance Path: ${source_path}" +color_echo "$Cyan" " Hardlink Path: ${dest_path}" +color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}" +color_echo "$Cyan" " Use Checksum: ${checksum_flag}" + +# count number of hardlinked files +file_count=$(find "${source_path}" -type f -links 2 | wc -l) + +color_echo "$Cyan" " File count: ${file_count}" + +# create db file +if [ "${passes_flag}" -ge 1 ]; then + touch "./${rebalance_db_file_name}" +fi + +# recursively scan through files and execute "rebalance" procedure if the file is a hardlink +find "$source_path" -type f -links 2 -print0 | while IFS= read -r -d '' file; do rebalance "$file" "$dest_path"; done + +echo "" +echo "" +color_echo "$Green" "Done!" diff --git a/zfs-hardlink-rebalancing.sh b/zfs-hardlink-rebalancing.sh index b9fe6ce..d7564be 100755 --- a/zfs-hardlink-rebalancing.sh +++ b/zfs-hardlink-rebalancing.sh @@ -26,7 +26,7 @@ Cyan='\033[0;36m' # Cyan # print a help message function print_usage() { - echo "Usage: zfs-inplace-rebalancing --checksum true --skip-hardlinks false --passes 1 /my/pool" + echo "Usage: zfs-inplace-rebalancing --checksum true --passes 1 /data/source /data/dest" } # print a given text entirely in a given color @@ -55,37 +55,36 @@ function get_rebalance_count () { # rebalance a specific file function rebalance () { file_path=$1 + hardlink_dir=$2 - # check if file has >=2 links in the case of --skip-hardlinks - # this shouldn't be needed in the typical case of `find` only finding files with links == 1 + # check if file has exactly 2 links + # this shouldn't be needed in the typical case of `find` only finding files with links == 2 # but this can run for a long time, so it's good to double check if something changed - if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then - # Linux - # - # -c --format=FORMAT - # use the specified FORMAT instead of the default; output a - # newline after each use of FORMAT - # %h number of hard links - - hardlink_count=$(stat -c "%h" "${file_path}") - elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then - # Mac OS - # FreeBSD - # -f format - # Display information using the specified format - # l Number of hard links to file (st_nlink) - - hardlink_count=$(stat -f %l "${file_path}") - else - echo "Unsupported OS type: $OSTYPE" - exit 1 - fi - - if [ "${hardlink_count}" -ge 2 ]; then - echo "Skipping hard-linked file: ${file_path}" - return - fi + if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then + # Linux + # + # -c --format=FORMAT + # use the specified FORMAT instead of the default; output a + # newline after each use of FORMAT + # %h number of hard links + + hardlink_count=$(stat -c "%h" "${file_path}") + elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then + # Mac OS + # FreeBSD + # -f format + # Display information using the specified format + # l Number of hard links to file (st_nlink) + + hardlink_count=$(stat -f %l "${file_path}") + else + echo "Unsupported OS type: $OSTYPE" + exit 1 + fi + + if [ "${hardlink_count}" -ne 2 ]; then + echo "Skipping non hard-linked file: ${file_path}" + return fi current_index="$((current_index + 1))" @@ -108,6 +107,13 @@ function rebalance () { tmp_extension=".balance" tmp_file_path="${file_path}${tmp_extension}" + # Find other hardlinked file and remove it + inode_val=$(ls -i "${file_path}" | awk '{print $1}') + hardlink_path=$(find "${hardlink_dir}" -inum ${inode_val}) + echo "Removing hardlink '${hardlink_path}'..." + rm "${hardlink_path}" + + # Continue with rebalance echo "Copying '${file_path}' to '${tmp_file_path}'..." if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then # Linux @@ -190,6 +196,9 @@ function rebalance () { echo "Renaming temporary copy to original '${file_path}'..." mv "${tmp_file_path}" "${file_path}" + echo "Recreating deleted hardlink '${hardlink_path}'..." + ln "$file_path" "$hardlink_path" + if [ "${passes_flag}" -ge 1 ]; then # update rebalance "database" line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) @@ -206,10 +215,9 @@ function rebalance () { } checksum_flag='true' -skip_hardlinks_flag='false' passes_flag='1' -if [[ "$#" -eq 0 ]]; then +if [[ "$#" -ne 2 ]]; then print_usage exit 0 fi @@ -228,14 +236,6 @@ while true ; do fi shift 2 ;; - --skip-hardlinks ) - if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then - skip_hardlinks_flag="true" - else - skip_hardlinks_flag="false" - fi - shift 2 - ;; -p | --passes ) passes_flag=$2 shift 2 @@ -246,20 +246,17 @@ while true ; do esac done; -root_path=$1 +source_path=$1 +dest_path=$2 color_echo "$Cyan" "Start rebalancing $(date):" -color_echo "$Cyan" " Path: ${root_path}" +color_echo "$Cyan" " Rebalance Path: ${source_path}" +color_echo "$Cyan" " Hardlink Path: ${dest_path}" color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}" color_echo "$Cyan" " Use Checksum: ${checksum_flag}" -color_echo "$Cyan" " Skip Hardlinks: ${skip_hardlinks_flag}" -# count files -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - file_count=$(find "${root_path}" -type f -links 1 | wc -l) -else - file_count=$(find "${root_path}" -type f | wc -l) -fi +# count number of hardlinked files +file_count=$(find "${source_path}" -type f -links 2 | wc -l) color_echo "$Cyan" " File count: ${file_count}" @@ -268,13 +265,8 @@ if [ "${passes_flag}" -ge 1 ]; then touch "./${rebalance_db_file_name}" fi -# recursively scan through files and execute "rebalance" procedure -# in the case of --skip-hardlinks, only find files with links == 1 -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -else - find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -fi +# recursively scan through files and execute "rebalance" procedure if the file is a hardlink +find "${source_path}" -type f -links 2 -print0 | while IFS= read -r -d '' file; do rebalance "${file}" "${dest_path}"; done echo "" echo "" diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh index b9fe6ce..c29d246 100755 --- a/zfs-inplace-rebalancing.sh +++ b/zfs-inplace-rebalancing.sh @@ -26,7 +26,7 @@ Cyan='\033[0;36m' # Cyan # print a help message function print_usage() { - echo "Usage: zfs-inplace-rebalancing --checksum true --skip-hardlinks false --passes 1 /my/pool" + echo "Usage: zfs-inplace-rebalancing --checksum true --passes 1 /my/pool" } # print a given text entirely in a given color @@ -56,36 +56,34 @@ function get_rebalance_count () { function rebalance () { file_path=$1 - # check if file has >=2 links in the case of --skip-hardlinks + # check if file has >=2 links # this shouldn't be needed in the typical case of `find` only finding files with links == 1 # but this can run for a long time, so it's good to double check if something changed - if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then - # Linux - # - # -c --format=FORMAT - # use the specified FORMAT instead of the default; output a - # newline after each use of FORMAT - # %h number of hard links + if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then + # Linux + # + # -c --format=FORMAT + # use the specified FORMAT instead of the default; output a + # newline after each use of FORMAT + # %h number of hard links - hardlink_count=$(stat -c "%h" "${file_path}") - elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then - # Mac OS - # FreeBSD - # -f format - # Display information using the specified format - # l Number of hard links to file (st_nlink) + hardlink_count=$(stat -c "%h" "${file_path}") + elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then + # Mac OS + # FreeBSD + # -f format + # Display information using the specified format + # l Number of hard links to file (st_nlink) - hardlink_count=$(stat -f %l "${file_path}") - else - echo "Unsupported OS type: $OSTYPE" - exit 1 - fi + hardlink_count=$(stat -f %l "${file_path}") + else + echo "Unsupported OS type: $OSTYPE" + exit 1 + fi - if [ "${hardlink_count}" -ge 2 ]; then - echo "Skipping hard-linked file: ${file_path}" - return - fi + if [ "${hardlink_count}" -ge 2 ]; then + echo "Skipping hard-linked file: ${file_path}" + return fi current_index="$((current_index + 1))" @@ -206,7 +204,6 @@ function rebalance () { } checksum_flag='true' -skip_hardlinks_flag='false' passes_flag='1' if [[ "$#" -eq 0 ]]; then @@ -228,14 +225,6 @@ while true ; do fi shift 2 ;; - --skip-hardlinks ) - if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then - skip_hardlinks_flag="true" - else - skip_hardlinks_flag="false" - fi - shift 2 - ;; -p | --passes ) passes_flag=$2 shift 2 @@ -252,14 +241,9 @@ color_echo "$Cyan" "Start rebalancing $(date):" color_echo "$Cyan" " Path: ${root_path}" color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}" color_echo "$Cyan" " Use Checksum: ${checksum_flag}" -color_echo "$Cyan" " Skip Hardlinks: ${skip_hardlinks_flag}" # count files -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - file_count=$(find "${root_path}" -type f -links 1 | wc -l) -else - file_count=$(find "${root_path}" -type f | wc -l) -fi +file_count=$(find "${root_path}" -type f -links 1 | wc -l) color_echo "$Cyan" " File count: ${file_count}" @@ -268,13 +252,8 @@ if [ "${passes_flag}" -ge 1 ]; then touch "./${rebalance_db_file_name}" fi -# recursively scan through files and execute "rebalance" procedure -# in the case of --skip-hardlinks, only find files with links == 1 -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -else - find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -fi +# recursively scan through files and execute "rebalance" procedure on files with links == 1 +find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done echo "" echo ""