Initial version

This commit is contained in:
Philipp Woelfel 2025-11-15 19:56:48 -07:00
commit 5106197dc6
3 changed files with 444 additions and 0 deletions

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Philipp Woelfel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

45
README.md Normal file
View File

@ -0,0 +1,45 @@
# ZFS Snapshot Checker
Script to check zfs snapshot counts and age of oldest most recent snapshot. Specifically, this script iterates over a number of specified datasets, determines for each the age of the most recent snapshot (with a specified prefix), and then computes the maximum of those ages. This can be used to monitor zfs snapshotting and replication tools such as [zrepl](https://zrepl.github.io), [sanoid/syncoid](https://github.com/jimsalterjrs/sanoid/wiki). In addition, the script determines the number of snapshots for each specified dataset, as well as the maximum of those numbers. This facilitates monitoring of proper snapshot pruning.
## Examples
```bash
# Basic check
./checkSnapshots.sh -d tank/data
# Recursive with verbose output
./checkSnapshots.sh -d tank/data -r -v
# Provide limits (max 20 snapshots, max age 24h), resulting exit code 1, if limits are exceeded.
./checkSnapshots.sh -d tank/data -s 20 -m 24h
```
## Options
```bash
-d, --dataset <dataset> Dataset to check (required, repeat for multiple)
-e, --exclude <dataset> Exclude dataset (repeat for multiple)
-p, --prefix <prefix> Snapshot prefix (default: zrepl_)
-f, --format <format> Timestamp format (see [below](#timestamp-formats--f))
-s, --max-snapshots <num> Max snapshots per dataset
-m, --max-age <duration> Max age (e.g., 31s, 17m, 24h)
-r, --recursive Include sub-datasets
-v, --verbose Show detailed table
-n, --err-for-no-snapshots Error if no snapshots found
```
## Timestamp Formats (`-f`)
- `zrepl-dense` - `20060102_150405_000`
- `zrepl-human` - `2006-01-02_15:04:05`
- `iso-8601` - `2006-01-02T15:04:05Z`
- `unix-seconds` - Unix epoch
- `sanoid` - default sanoid format
Without this flag, ZFS creation time will be used.
## License
MIT License - see [LICENSE](LICENSE)

378
checkSnapshots.sh Executable file
View File

@ -0,0 +1,378 @@
#!/usr/bin/env bash
# Checks ZFS datasets for snapshots with a specified prefix and determines the maximum
# number of snapshots any dataset has, as well as the age of the oldest "most recent"
# snapshot among all datasets.
# Default values
PREFIX="zrepl_"
DATASETS=()
EXCLUDE_DATASETS=()
MAX_DURATION=""
MAX_DURATION_SECONDS=0
RECURSIVE=false
MAX_SNAPSHOTS=""
VERBOSE=false
ERR_FOR_NO_SNAPSHOTS=false
FORMAT=""
usage() {
echo "Usage: $0 -d|--dataset <dataset> [-d|--dataset <dataset>...] [-p|--prefix <prefix>] [-f|--format <format>] [-m <duration>] [-s|--max-snapshots <num>] [-e|--exclude <dataset>] [-r|--recursive] [-v|--verbose] [-n|--err-for-no-snapshots]"
echo " -d, --dataset ZFS dataset to check (can be specified multiple times)"
echo " -e, --exclude Dataset to exclude (can be specified multiple times)"
echo " -p, --prefix Snapshot prefix (default: zrepl_)"
echo " -f, --format Timestamp format: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid"
echo " -m, --max-age Maximum age (e.g., 31s, 17m, 24h)"
echo " -s, --max-snapshots Maximum allowed snapshots per dataset"
echo " -r, --recursive Include all sub-datasets of specified datasets"
echo " -v, --verbose Print per-dataset snapshot count and age"
echo " -n, --err-for-no-snapshots Return error if any dataset has no snapshots"
exit 1
}
duration_to_seconds() {
local duration="$1"
local number="${duration%?}"
local unit="${duration: -1}"
case "$unit" in
s) echo "$number" ;;
m) echo $((number * 60)) ;;
h) echo $((number * 3600)) ;;
d) echo $((number * 86400)) ;;
*) echo "Invalid duration unit: $unit" >&2; exit 1 ;;
esac
}
# timestamp_to_epoch() {
# # Format: YYYYMMDD_HHMMSS_mmm
# local timestamp="$1"
# local year="${timestamp:0:4}"
# local month="${timestamp:4:2}"
# local day="${timestamp:6:2}"
# local hour="${timestamp:9:2}"
# local minute="${timestamp:11:2}"
# local second="${timestamp:13:2}"
# # Convert to epoch using date command (assuming UTC)
# date -u -d "${year}-${month}-${day} ${hour}:${minute}:${second}" +%s 2>/dev/null
# }
# Format age in seconds to human-readable format
format_age() {
local total_seconds="$1"
local days=$((total_seconds / 86400))
local hours=$(((total_seconds % 86400) / 3600))
local minutes=$(((total_seconds % 3600) / 60))
local seconds=$((total_seconds % 60))
local result=""
[[ $days -gt 0 ]] && result="${days}d "
[[ $hours -gt 0 ]] && result="${result}${hours}h "
[[ $minutes -gt 0 ]] && result="${result}${minutes}m "
[[ $seconds -gt 0 || -z "$result" ]] && result="${result}${seconds}s"
echo "${result% }" # Remove trailing space
}
# Parse snapshot name based on format type and convert to epoch
parse_snapshot_time() {
local snapshot_name="$1"
local format="$2"
# Extract the part after @ (snapshot name)
local snap_part="${snapshot_name##*@}"
# Remove prefix if present
if [[ -n "$PREFIX" && "$snap_part" =~ ^${PREFIX} ]]; then
snap_part="${snap_part#${PREFIX}}"
fi
local parsed_date=""
case "$format" in
zrepl-dense)
# Format: YYYYMMDD_HHMMSS_mmm
if [[ "$snap_part" =~ ^([0-9]{8})_([0-9]{6})_([0-9]{3})$ ]]; then
local date_part="${BASH_REMATCH[1]}"
local time_part="${BASH_REMATCH[2]}"
parsed_date="${date_part:0:4}-${date_part:4:2}-${date_part:6:2} ${time_part:0:2}:${time_part:2:2}:${time_part:4:2}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
zrepl-human)
# Format: YYYY-MM-DD_HH:MM:SS
if [[ "$snap_part" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2})$ ]]; then
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
iso-8601)
date -u -d "$snap_part" +%s 2>/dev/null
;;
unix-seconds)
# Format: Unix epoch timestamp
if [[ "$snap_part" =~ ^([0-9]+)$ ]]; then
echo "${BASH_REMATCH[1]}"
return 0
fi
;;
sanoid)
# Format: YYYY-MM-DD_HH:MM:SS_<type>
if [[ "$snap_part" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2}) ]]; then
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
esac
}
# Get ZFS creation time of a snapshot
get_snapshot_creation_time() {
local snapshot="$1"
zfs get -H -p -o value creation "$snapshot" 2>/dev/null
}
# Get all datasets to process
get_all_datasets() {
local dataset="$1"
if ! zfs list "$dataset" >/dev/null 2>&1; then
echo "Warning: Dataset $dataset does not exist" >&2
return 1
fi
if [[ "$RECURSIVE" == "true" ]]; then
zfs list -H -r -t filesystem -o name "$dataset" 2>/dev/null || {
echo "Warning: Failed to list sub-datasets for $dataset" >&2
return 1
}
else
echo "$dataset"
fi
}
while [[ $# -gt 0 ]]; do
case $1 in
-d|--dataset)
if [[ -z "$2" ]]; then
echo "Error: Dataset name required after $1" >&2
exit 1
fi
DATASETS+=("$2")
shift 2
;;
-e|--exclude)
if [[ -z "$2" ]]; then
echo "Error: Dataset name required after $1" >&2
exit 1
fi
EXCLUDE_DATASETS+=("$2")
shift 2
;;
-p|--prefix)
if [[ -z "$2" ]]; then
echo "Error: Prefix required after $1" >&2
exit 1
fi
PREFIX="$2"
shift 2
;;
-f|--format)
if [[ -z "$2" ]]; then
echo "Error: Format name required after $1" >&2
exit 1
fi
case "$2" in
zrepl-dense|zrepl-human|iso-8601|unix-seconds|sanoid)
FORMAT="$2"
;;
*)
echo "Error: Invalid format '$2'. Must be one of: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid" >&2
exit 1
;;
esac
shift 2
;;
-m|--max-age)
if [[ -z "$2" ]]; then
echo "Error: Duration required after $1" >&2
exit 1
fi
MAX_DURATION="$2"
MAX_DURATION_SECONDS=$(duration_to_seconds "$2")
shift 2
;;
-s|--max-snapshots)
if [[ -z "$2" ]]; then
echo "Error: Number required after $1" >&2
exit 1
fi
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
echo "Error: Max snapshots must be a positive integer" >&2
exit 1
fi
MAX_SNAPSHOTS="$2"
shift 2
;;
-r|--recursive)
RECURSIVE=true
shift
;;
-v|--verbose)
VERBOSE=true
shift
;;
-n|--err-for-no-snapshots)
ERR_FOR_NO_SNAPSHOTS=true
shift
;;
-h|--help)
usage
;;
*)
echo "Unknown option: $1" >&2
usage
;;
esac
done
if [[ ${#DATASETS[@]} -eq 0 ]]; then
echo "Error: At least one dataset must be specified" >&2
usage
fi
OLDEST_LATEST_EPOCH=0
OLDEST_SNAPSHOT_DATASET=""
MAX_SNAPSHOT_COUNT=0
MAX_SNAPSHOT_DATASET=""
EXCEEDS_LIMIT=false
HAS_DATASET_WITHOUT_SNAPSHOTS=false
# Collect all datasets to process (with exclusions applied) - compute once
DATASETS_TO_PROCESS=()
MAX_DATASET_NAME_LENGTH=0
for base_dataset in "${DATASETS[@]}"; do
while IFS= read -r dataset; do
[[ -z "$dataset" ]] && continue
# Check if this dataset should be excluded
excluded=false
for exclude_pattern in "${EXCLUDE_DATASETS[@]}"; do
if [[ "$dataset" == "$exclude_pattern" ]]; then
excluded=true
break
fi
done
if [[ "$excluded" == "false" ]]; then
DATASETS_TO_PROCESS+=("$dataset")
dataset_length=${#dataset}
if [[ $dataset_length -gt $MAX_DATASET_NAME_LENGTH ]]; then
MAX_DATASET_NAME_LENGTH=$dataset_length
fi
fi
done < <(get_all_datasets "$base_dataset")
done
DATASET_COLUMN_WIDTH=$((MAX_DATASET_NAME_LENGTH + 2))
# Print table header
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "Dataset" "Snapshots" "Min Snapshot Age"
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$(printf '%*s' "$DATASET_COLUMN_WIDTH" | tr ' ' '-')" "---------" "----------------"
fi
# Process each dataset from the pre-computed list
for dataset in "${DATASETS_TO_PROCESS[@]}"; do
snapshots=$(zfs list -H -t snapshot -o name -s creation "$dataset" 2>/dev/null | grep "@${PREFIX}" || true)
if [[ -z "$snapshots" ]]; then
HAS_DATASET_WITHOUT_SNAPSHOTS=true
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "0" "n/a"
else
echo "Warning: No snapshots found for dataset $dataset with prefix $PREFIX" >&2
fi
continue
fi
snapshot_count=$(echo "$snapshots" | wc -l)
if [[ $snapshot_count -gt $MAX_SNAPSHOT_COUNT ]]; then
MAX_SNAPSHOT_COUNT=$snapshot_count
MAX_SNAPSHOT_DATASET="$dataset"
fi
if [[ -n "$MAX_SNAPSHOTS" && $snapshot_count -gt $MAX_SNAPSHOTS ]]; then
EXCEEDS_LIMIT=true
echo "Error: Dataset $dataset has $snapshot_count snapshots (exceeds limit of $MAX_SNAPSHOTS)" >&2
fi
latest_snapshot=$(echo "$snapshots" | tail -n 1)
if [[ -n "$latest_snapshot" ]]; then
if [[ -n "$FORMAT" ]]; then
epoch=$(parse_snapshot_time "$latest_snapshot" "$FORMAT")
else
epoch=$(get_snapshot_creation_time "$latest_snapshot")
fi
if [[ -n "$epoch" && "$epoch" -gt 0 ]]; then
dataset_age_seconds=$(($(date +%s) - epoch))
dataset_age_formatted=$(format_age "$dataset_age_seconds")
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "$snapshot_count" "$dataset_age_formatted"
fi
if [[ $OLDEST_LATEST_EPOCH -eq 0 || $epoch -lt $OLDEST_LATEST_EPOCH ]]; then
OLDEST_LATEST_EPOCH=$epoch
OLDEST_SNAPSHOT_DATASET="$dataset"
fi
else
echo "Warning: Could not determine snapshot age for $latest_snapshot" >&2
fi
fi
done
if [[ $OLDEST_LATEST_EPOCH -eq 0 ]]; then
echo "Error: No valid snapshots found for any dataset" >&2
exit 1
fi
CURRENT_EPOCH=$(date -u +%s)
TIME_DIFF_SECONDS=$((CURRENT_EPOCH - OLDEST_LATEST_EPOCH))
TIME_DIFF_FORMATTED=$(format_age "$TIME_DIFF_SECONDS")
if [[ "$VERBOSE" == "true" ]]; then
echo ""
fi
echo "Max snapshots: $MAX_SNAPSHOT_COUNT ($MAX_SNAPSHOT_DATASET)"
echo "Oldest most recent snapshot age: $TIME_DIFF_FORMATTED ($OLDEST_SNAPSHOT_DATASET)"
EXIT_CODE=0
if [[ "$EXCEEDS_LIMIT" == "true" ]]; then
EXIT_CODE=1
fi
if [[ "$ERR_FOR_NO_SNAPSHOTS" == "true" && "$HAS_DATASET_WITHOUT_SNAPSHOTS" == "true" ]]; then
EXIT_CODE=1
fi
if [[ -n "$MAX_DURATION" ]]; then
if [[ $TIME_DIFF_SECONDS -gt $MAX_DURATION_SECONDS ]]; then
EXIT_CODE=1
fi
fi
exit $EXIT_CODE