-F, --fixed-strings PATTERNS are strings

-x, --line-regexp match only whole lines
2022-03-29 03:40:06 +02:00
14 changed files with 17 additions and 306 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -1,9 +0,0 @@
-version: 2
-
-updates:
-  - package-ecosystem: github-actions
-    directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every week
-      interval: "weekly"
-  
--- a/.github/stale.yml
+++ b/.github/stale.yml
@ -1,59 +0,0 @@
-# Configuration for probot-stale - https://github.com/probot/stale
-
-# Number of days of inactivity before an Issue or Pull Request becomes stale
-daysUntilStale: 60
-
-# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
-# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
-daysUntilClose: 14
-
-# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
-exemptLabels:
-  - pinned
-  - security
-  - bug
-  - enhancement
-
-# Set to true to ignore issues in a project (defaults to false)
-exemptProjects: false
-
-# Set to true to ignore issues in a milestone (defaults to false)
-exemptMilestones: false
-
-# Set to true to ignore issues with an assignee (defaults to false)
-exemptAssignees: true
-
-# Label to use when marking as stale
-staleLabel: wontfix
-
-# Comment to post when marking as stale. Set to `false` to disable
-markComment: >
-  This issue has been automatically marked as stale because it has not had
-  recent activity. It will be closed if no further activity occurs. Thank you
-  for your contributions.
-
-# Comment to post when removing the stale label.
-# unmarkComment: >
-#   Your comment here.
-
-# Comment to post when closing a stale Issue or Pull Request.
-closeComment: >
-  There has been no incentive by contributors or maintainers to revive this stale issue and it will now be closed.
-
-# Limit the number of actions per hour, from 1-30. Default is 30
-limitPerRun: 30
-
-# Limit to only `issues` or `pulls`
-only: issues
-
-# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
-# pulls:
-#   daysUntilStale: 30
-#   markComment: >
-#     This pull request has been automatically marked as stale because it has not had
-#     recent activity. It will be closed if no further activity occurs. Thank you
-#     for your contributions.
-
-# issues:
-#   exemptLabels:
-#     - confirmed
--- a/.github/workflows/docker-latest.yml
+++ b/.github/workflows/docker-latest.yml
@ -1,44 +0,0 @@
-name: Docker latest
-
-on:
-  push:
-    branches: [ master ]
-
-env:
-  REGISTRY: ghcr.io
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Log into registry ${{ env.REGISTRY }}
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract Docker metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY }}/${{ github.repository }}
-          tags: |
-            type=raw,value=latest
-
-      - name: Build and push Docker image
-        id: build-and-push
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: Dockerfile
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml
@ -15,6 +15,6 @@ jobs:
    name: Shellcheck
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v2
    - name: Run ShellCheck
      uses: ludeeus/action-shellcheck@master
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -1,19 +0,0 @@
-# Test
-
-name: Test
-
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
-
-jobs:
-  shellcheck:
-    name: Test
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Run testing script
-      run: ./testing.sh
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1 @@
-test.log
-error.log
-rebalance_db.txt
-testing_data
+rebalance_db.txt
--- a/10
+++ b/10
@ -1,10 +0,0 @@
-FROM phusion/baseimage:jammy-1.0.1
-MAINTAINER markusressel
-
-RUN apt-get update \
-&& apt-get -y install bc \
-&& apt-get clean && rm -rf /var/lib/apt/lists/*
-
-COPY zfs-inplace-rebalancing.sh ./
-
-ENTRYPOINT ["./zfs-inplace-rebalancing.sh"]
--- a/README.md
+++ b/README.md
@ -5,11 +5,7 @@ Simple bash script to rebalance pool data between all mirrors when adding vdevs

 ## How it works

-This script recursively traverses all the files in a given directory. Each file is copied with a `.balance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive.
-
-The way ZFS distributes writes is not trivial, which makes it hard to predict how effective the redistribution will be. See:
- https://jrs-s.net/2018/04/11/zfs-allocates-writes-according-to-free-space-per-vdev-not-latency-per-vdev/
- https://jrs-s.net/2018/08/24/zfs-write-allocation-in-0-7-x/
+This script recursively traverses all the files in a given directory. Each file is copied with a `.rebalance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive.

 Note that this process is not entirely "in-place", since a file has to be fully copied before the original is deleted. The term is used to make it clear that no additional pool (and therefore hardware) is necessary to use this script. However, this also means that you have to have enough space to create a copy of the biggest file in your target directory for it to work.

@ -70,7 +66,7 @@ Due to the working principle of this script, it is crucial that you **only run i

 ### Snapshots

-If you do a snapshot of the data you want to balance before starting the rebalancing script, keep in mind that ZFS now has to keep track of all of the data in the target directory twice. Once in the snapshot you made, and once for the new copy. This means that you will effectively use double the file size of all files within the target directory. Therefore it is a good idea to process the pool data in batches and remove old snapshots along the way, since you probably will be hitting the capacity limits of your pool at some point during the rebalancing process.
+If you do a snapshot of the data you want to balance before starting the rebalancing script, keep in mind that ZFS now has to keep track of all of the data in the target directory twice. Once in the snapshot you made, and once for the new copy. This means that you will effectively use double the file size of all files within the target directory. Therefore it is a good idea to process the pool data in badges and remove old snapshots along the way, since you probably will be hitting the capacity limits of your pool at some point during the rebalancing process.

 ## Installation

@ -82,7 +78,7 @@ chmod +x ./zfs-inplace-rebalancing.sh
 ```

 Dependencies:
-* `perl` - it should be available on most systems by default
+* `pacman -S bc` - used for percentage calculation

 ## Usage

@ -90,7 +86,7 @@ Dependencies:

 You can print a help message by running the script without any parameters:

-```shell
+```
 ./zfs-inplace-rebalancing.sh
 ```

@ -100,52 +96,31 @@ You can print a help message by running the script without any parameters:
 |-----------|-------------|---------|
 | `-c`<br>`--checksum` | Whether to compare attributes and content of the copied file using an **MD5** checksum. Technically this is a redundent check and consumes a lot of resources, so think twice. | `true` |
 | `-p`<br>`--passes`   | The maximum number of rebalance passes per file. Setting this to infinity by using a value `<= 0` might improve performance when rebalancing a lot of small files. | `1` |
-| `--skip-hardlinks`   | Skip rebalancing hardlinked files, since it will only create duplicate data. | `false` |

 ### Example

 Make sure to run this script with a user that has rw permission to all of the files in the target directory.
 The easiest way to achieve this is by **running the script as root**.

-```shell
+```
 sudo su
 ./zfs-inplace-rebalancing.sh --checksum true --passes 1 /pool/path/to/rebalance
 ```

 To keep track of the balancing progress, you can open another terminal and run:

-```shell
-watch zpool list -v
 ```
-
-### Log to File
-
-To write the output to a file, simply redirect stdout and stderr to a file (or separate files).
-Since this redirects all output, you will have to follow the contents of the log files to get realtime info:
-
-```shell
-# one shell window:
-tail -F ./stdout.log
-# another shell window:
-./zfs-inplace-rebalancing.sh /pool/path/to/rebalance >> ./stdout.log 2>> ./stderr.log
+watch zpool list -v
 ```

 ### Things to consider

 Although this script **does** have a progress output (files as well as percentage) it might be a good idea to try a small subfolder first, or process your pool folder layout in manually selected badges. This can also limit the damage done, if anything bad happens.

-When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".balance" file might be left and you have to rename (or delete) it manually.
+When aborting the script midway through, be sure to check the last lines of its output. When cancelling before or during the renaming process a ".rebalance" file might be left and you have to rename (or delete) it manually.

 Although the `--passes` parameter can be used to limit the maximum amount of rebalance passes per file, it is only meant to speedup aborted runs. Individual files will **not be process multiple times automatically**. To reach multiple passes you have to run the script on the same target directory multiple times.

-### Dockerfile
-
-To increase portability, this script can also be run using docker:
-
-```shell
-sudo docker run --rm -it -v /your/data:/data ghcr.io/markusressel/zfs-inplace-rebalancing:latest ./data
-```
-
 # Contributing

 GitHub is for social coding: if you want to write code, I encourage contributions through pull requests from forks
--- a/test/pool/abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt
+++ b/test/pool/abc-123_!"§$%&()[]{}=?`#'*+-_,;.:|<>~@.txt
@ -1 +0,0 @@
-test
--- a/test/pool/projects/[2020]
+++ b/test/pool/projects/[2020]
@ -1 +0,0 @@
-test
--- a/test/pool/projects/[2020]
+++ b/test/pool/projects/[2020]
@ -1 +0,0 @@
-test
--- a/test/pool/projects/[2020]
+++ b/test/pool/projects/[2020]
@ -1 +0,0 @@
-test
--- a/testing.sh
+++ b/testing.sh
@ -1,78 +0,0 @@
-#!/usr/bin/env bash
-
-# exit script on error
-set -e
-# exit on undeclared variable
-set -u
-
-log_std_file=./test.log
-log_error_file=./error.log
-test_data_src=./test/pool
-test_pool_data_path=./testing_data
-
-function prepare() {
-  # cleanup
-  rm -f $log_std_file
-  rm -f $log_error_file
-  rm -f rebalance_db.txt
-  rm -rf $test_pool_data_path
-
-  # setup
-  cp -rf $test_data_src $test_pool_data_path
-}
-
-function assertions() {
-  # check error log is empty
-  if grep -q '[^[:space:]]' $log_error_file; then
-    echo "error log is not empty!"
-    cat $log_error_file
-    exit 1
-  fi
-}
-
-function assert_matching_file_copied() {
-  if ! grep "Copying" $log_std_file | grep -q "$1"; then
-    echo "File matching '$1' was not copied when it should have been!"
-    exit 1
-  fi
-}
-
-function assert_matching_file_not_copied() {
-  if grep "Copying" $log_std_file | grep -q "$1"; then
-    echo "File matching '$1' was copied when it should have been skipped!"
-    exit 1
-  fi
-}
-
-prepare
-./zfs-inplace-rebalancing.sh $test_pool_data_path >> $log_std_file 2>> $log_error_file
-cat $log_std_file
-assertions
-
-prepare
-./zfs-inplace-rebalancing.sh --checksum true --passes 1 $test_pool_data_path >> $log_std_file 2>> $log_error_file
-cat $log_std_file
-assertions
-
-prepare
-./zfs-inplace-rebalancing.sh --checksum false $test_pool_data_path >> $log_std_file 2>> $log_error_file
-cat $log_std_file
-assertions
-
-prepare
-ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
-./zfs-inplace-rebalancing.sh --skip-hardlinks false $test_pool_data_path >> $log_std_file 2>> $log_error_file
-cat $log_std_file
-# Both link files should be copied
-assert_matching_file_copied "mp4.txt"
-assert_matching_file_copied "mp4.txt.link"
-assertions
-
-prepare
-ln "$test_pool_data_path/projects/[2020] some project/mp4.txt" "$test_pool_data_path/projects/[2020] some project/mp4.txt.link"
-./zfs-inplace-rebalancing.sh --skip-hardlinks true $test_pool_data_path >> $log_std_file 2>> $log_error_file
-cat $log_std_file
-# Neither file should be copied now, since they are each a hardlink
-assert_matching_file_not_copied "mp4.txt.link"
-assert_matching_file_not_copied "mp4.txt"
-assertions
--- a/zfs-inplace-rebalancing.sh
+++ b/zfs-inplace-rebalancing.sh
@ -26,7 +26,7 @@ Cyan='\033[0;36m'         # Cyan

 # print a help message
 function print_usage() {
-  echo "Usage: zfs-inplace-rebalancing --checksum true --skip-hardlinks false --passes 1 /my/pool"
+  echo "Usage: zfs-inplace-rebalancing -checksum true -passes 1 /my/pool"
 }

 # print a given text entirely in a given color
@ -56,20 +56,8 @@ function get_rebalance_count () {
 function rebalance () {
    file_path=$1

-    # check if file has >=2 links in the case of --skip-hardlinks
-    # this shouldn't be needed in the typical case of `find` only finding files with links == 1
-    # but this can run for a long time, so it's good to double check if something changed
-    if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
-        hardlink_count=$(stat -c "%h" "${file_path}")
-
-        if [ "${hardlink_count}" -ge 2 ]; then
-            echo "Skipping hard-linked file: ${file_path}"
-            return
-        fi
-    fi
-
    current_index="$((current_index + 1))"
-    progress_percent=$(perl -e "printf('%0.2f', ${current_index}*100/${file_count})") 
+    progress_percent=$(echo "scale=2; ${current_index}*100/${file_count}" | bc)
    color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)" 

    if [[ ! -f "${file_path}" ]]; then
@ -92,12 +80,11 @@ function rebalance () {
    if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then
        # Linux

-        # --reflink=never -- force standard copy (see ZFS Block Cloning)
        # -a -- keep attributes
        # -d -- keep symlinks (dont copy target)
        # -x -- stay on one system
        # -p -- preserve ACLs too
-        cp --reflink=never -adxp "${file_path}" "${tmp_file_path}"
+        cp -adxp "${file_path}" "${tmp_file_path}"
    elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then
        # Mac OS
        # FreeBSD
@ -188,36 +175,23 @@ function rebalance () {
 }

 checksum_flag='true'
-skip_hardlinks_flag='false'
 passes_flag='1'

-if [[ "$#" -eq 0 ]]; then
+if [ "$#" -eq 0 ]; then
    print_usage
    exit 0
 fi

 while true ; do
    case "$1" in
-        -h | --help )
-            print_usage
-            exit 0
-        ;;
        -c | --checksum )
-            if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
+            if [ "$2" -eq 1 ] || [[ "$2" =~ (on|true|yes) ]]; then
                checksum_flag="true"
            else
                checksum_flag="false"
            fi
            shift 2
        ;;
-        --skip-hardlinks )
-            if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then
-                skip_hardlinks_flag="true"
-            else
-                skip_hardlinks_flag="false"
-            fi
-            shift 2
-        ;;
        -p | --passes )
            passes_flag=$2
            shift 2
@ -230,19 +204,13 @@ done;

 root_path=$1

-color_echo "$Cyan" "Start rebalancing $(date):"
+color_echo "$Cyan" "Start rebalancing:"
 color_echo "$Cyan" "  Path: ${root_path}"
 color_echo "$Cyan" "  Rebalancing Passes: ${passes_flag}"
 color_echo "$Cyan" "  Use Checksum: ${checksum_flag}"
-color_echo "$Cyan" "  Skip Hardlinks: ${skip_hardlinks_flag}"

 # count files
-if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
-    file_count=$(find "${root_path}" -type f -links 1 | wc -l)
-else
-    file_count=$(find "${root_path}" -type f | wc -l)
-fi
-
+file_count=$(find "${root_path}" -type f | wc -l)
 color_echo "$Cyan" "  File count: ${file_count}"

 # create db file
@ -251,13 +219,7 @@ if [ "${passes_flag}" -ge 1 ]; then
 fi

 # recursively scan through files and execute "rebalance" procedure
-# in the case of --skip-hardlinks, only find files with links == 1
-if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then
-    find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
-else
-    find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
-fi
-
+find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done
 echo ""
 echo ""
 color_echo "$Green" "Done!"