zfsSnapshotChecker/checkSnapshots.sh

366 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
# Checks ZFS datasets for snapshots with a specified prefix and determines the maximum
# number of snapshots any dataset has, as well as the age of the oldest "most recent"
# snapshot among all datasets.
# Default values
PREFIX="zrepl_"
DATASETS=()
EXCLUDE_DATASETS=()
MAX_DURATION=""
MAX_DURATION_SECONDS=0
RECURSIVE=false
MAX_SNAPSHOTS=""
VERBOSE=false
ERR_FOR_NO_SNAPSHOTS=false
FORMAT=""
usage() {
echo "Usage: $0 -d|--dataset <dataset> [-d|--dataset <dataset>...] [-p|--prefix <prefix>] [-f|--format <format>] [-m <duration>] [-s|--max-snapshots <num>] [-e|--exclude <dataset>] [-r|--recursive] [-v|--verbose] [-n|--err-for-no-snapshots] [-h|--help]"
echo " -d, --dataset ZFS dataset to check (can be specified multiple times)"
echo " -e, --exclude Dataset to exclude (can be specified multiple times)"
echo " -p, --prefix Snapshot prefix (default: zrepl_)"
echo " -f, --format Timestamp format: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid"
echo " -m, --max-age Maximum age (e.g., 31s, 17m, 24h)"
echo " -s, --max-snapshots Maximum allowed snapshots per dataset"
echo " -r, --recursive Include all sub-datasets of specified datasets"
echo " -v, --verbose Print per-dataset snapshot count and age"
echo " -n, --err-for-no-snapshots Return error if any dataset has no snapshots"
echo " -h, --help Show this help message"
exit 1
}
duration_to_seconds() {
local duration="$1"
local number="${duration%?}"
local unit="${duration: -1}"
case "$unit" in
s) echo "$number" ;;
m) echo $((number * 60)) ;;
h) echo $((number * 3600)) ;;
d) echo $((number * 86400)) ;;
*) echo "Invalid duration unit: $unit" >&2; exit 1 ;;
esac
}
# Format age in seconds to human-readable format
format_age() {
local total_seconds="$1"
local days=$((total_seconds / 86400))
local hours=$(((total_seconds % 86400) / 3600))
local minutes=$(((total_seconds % 3600) / 60))
local seconds=$((total_seconds % 60))
local result=""
[[ $days -gt 0 ]] && result="${days}d "
[[ $hours -gt 0 ]] && result="${result}${hours}h "
[[ $minutes -gt 0 ]] && result="${result}${minutes}m "
[[ $seconds -gt 0 || -z "$result" ]] && result="${result}${seconds}s"
echo "${result% }" # Remove trailing space
}
# Parse snapshot name based on format type and convert to epoch
parse_snapshot_time() {
local snapshot_name="$1"
local format="$2"
# Extract the part after @ (snapshot name)
local snap_part="${snapshot_name##*@}"
# Remove prefix if present
if [[ -n "$PREFIX" && "$snap_part" =~ ^${PREFIX} ]]; then
snap_part="${snap_part#${PREFIX}}"
fi
local parsed_date=""
case "$format" in
zrepl-dense)
# Format: YYYYMMDD_HHMMSS_mmm
if [[ "$snap_part" =~ ^([0-9]{8})_([0-9]{6})_([0-9]{3})$ ]]; then
local date_part="${BASH_REMATCH[1]}"
local time_part="${BASH_REMATCH[2]}"
parsed_date="${date_part:0:4}-${date_part:4:2}-${date_part:6:2} ${time_part:0:2}:${time_part:2:2}:${time_part:4:2}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
zrepl-human)
# Format: YYYY-MM-DD_HH:MM:SS
if [[ "$snap_part" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2})$ ]]; then
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
iso-8601)
date -u -d "$snap_part" +%s 2>/dev/null
;;
unix-seconds)
# Format: Unix epoch timestamp
if [[ "$snap_part" =~ ^([0-9]+)$ ]]; then
echo "${BASH_REMATCH[1]}"
return 0
fi
;;
sanoid)
# Format: YYYY-MM-DD_HH:MM:SS_<type>
if [[ "$snap_part" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2}) ]]; then
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
date -u -d "$parsed_date" +%s 2>/dev/null
fi
;;
esac
}
# Get ZFS creation time of a snapshot
get_snapshot_creation_time() {
local snapshot="$1"
zfs get -H -p -o value creation "$snapshot" 2>/dev/null
}
# Get all datasets to process
get_all_datasets() {
local dataset="$1"
if ! zfs list "$dataset" >/dev/null 2>&1; then
echo "Warning: Dataset $dataset does not exist" >&2
return 1
fi
if [[ "$RECURSIVE" == "true" ]]; then
zfs list -H -r -t filesystem -o name "$dataset" 2>/dev/null || {
echo "Warning: Failed to list sub-datasets for $dataset" >&2
return 1
}
else
echo "$dataset"
fi
}
while [[ $# -gt 0 ]]; do
case $1 in
-d|--dataset)
if [[ -z "$2" ]]; then
echo "Error: Dataset name required after $1" >&2
exit 1
fi
DATASETS+=("$2")
shift 2
;;
-e|--exclude)
if [[ -z "$2" ]]; then
echo "Error: Dataset name required after $1" >&2
exit 1
fi
EXCLUDE_DATASETS+=("$2")
shift 2
;;
-p|--prefix)
if [[ -z "$2" ]]; then
echo "Error: Prefix required after $1" >&2
exit 1
fi
PREFIX="$2"
shift 2
;;
-f|--format)
if [[ -z "$2" ]]; then
echo "Error: Format name required after $1" >&2
exit 1
fi
case "$2" in
zrepl-dense|zrepl-human|iso-8601|unix-seconds|sanoid)
FORMAT="$2"
;;
*)
echo "Error: Invalid format '$2'. Must be one of: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid" >&2
exit 1
;;
esac
shift 2
;;
-m|--max-age)
if [[ -z "$2" ]]; then
echo "Error: Duration required after $1" >&2
exit 1
fi
MAX_DURATION="$2"
MAX_DURATION_SECONDS=$(duration_to_seconds "$2")
shift 2
;;
-s|--max-snapshots)
if [[ -z "$2" ]]; then
echo "Error: Number required after $1" >&2
exit 1
fi
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
echo "Error: Max snapshots must be a positive integer" >&2
exit 1
fi
MAX_SNAPSHOTS="$2"
shift 2
;;
-r|--recursive)
RECURSIVE=true
shift
;;
-v|--verbose)
VERBOSE=true
shift
;;
-n|--err-for-no-snapshots)
ERR_FOR_NO_SNAPSHOTS=true
shift
;;
-h|--help)
usage
;;
*)
echo "Unknown option: $1" >&2
usage
;;
esac
done
if [[ ${#DATASETS[@]} -eq 0 ]]; then
echo "Error: At least one dataset must be specified" >&2
usage
fi
OLDEST_LATEST_EPOCH=0
OLDEST_SNAPSHOT_DATASET=""
MAX_SNAPSHOT_COUNT=0
MAX_SNAPSHOT_DATASET=""
EXCEEDS_LIMIT=false
HAS_DATASET_WITHOUT_SNAPSHOTS=false
# Collect all datasets to process (with exclusions applied) - compute once
DATASETS_TO_PROCESS=()
MAX_DATASET_NAME_LENGTH=0
for base_dataset in "${DATASETS[@]}"; do
while IFS= read -r dataset; do
[[ -z "$dataset" ]] && continue
# Check if this dataset should be excluded
excluded=false
for exclude_pattern in "${EXCLUDE_DATASETS[@]}"; do
if [[ "$dataset" == "$exclude_pattern" ]]; then
excluded=true
break
fi
done
if [[ "$excluded" == "false" ]]; then
DATASETS_TO_PROCESS+=("$dataset")
dataset_length=${#dataset}
if [[ $dataset_length -gt $MAX_DATASET_NAME_LENGTH ]]; then
MAX_DATASET_NAME_LENGTH=$dataset_length
fi
fi
done < <(get_all_datasets "$base_dataset")
done
DATASET_COLUMN_WIDTH=$((MAX_DATASET_NAME_LENGTH + 2))
# Print table header
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "Dataset" "Snapshots" "Min Snapshot Age"
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$(printf '%*s' "$DATASET_COLUMN_WIDTH" | tr ' ' '-')" "---------" "----------------"
fi
# Process each dataset from the pre-computed list
for dataset in "${DATASETS_TO_PROCESS[@]}"; do
snapshots=$(zfs list -H -t snapshot -o name -s creation "$dataset" 2>/dev/null | grep "@${PREFIX}" || true)
if [[ -z "$snapshots" ]]; then
HAS_DATASET_WITHOUT_SNAPSHOTS=true
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "0" "n/a"
else
echo "Warning: No snapshots found for dataset $dataset with prefix $PREFIX" >&2
fi
continue
fi
snapshot_count=$(echo "$snapshots" | wc -l)
if [[ $snapshot_count -gt $MAX_SNAPSHOT_COUNT ]]; then
MAX_SNAPSHOT_COUNT=$snapshot_count
MAX_SNAPSHOT_DATASET="$dataset"
fi
if [[ -n "$MAX_SNAPSHOTS" && $snapshot_count -gt $MAX_SNAPSHOTS ]]; then
EXCEEDS_LIMIT=true
echo "Error: Dataset $dataset has $snapshot_count snapshots (exceeds limit of $MAX_SNAPSHOTS)" >&2
fi
latest_snapshot=$(echo "$snapshots" | tail -n 1)
if [[ -n "$latest_snapshot" ]]; then
if [[ -n "$FORMAT" ]]; then
epoch=$(parse_snapshot_time "$latest_snapshot" "$FORMAT")
else
epoch=$(get_snapshot_creation_time "$latest_snapshot")
fi
if [[ -n "$epoch" && "$epoch" -gt 0 ]]; then
dataset_age_seconds=$(($(date +%s) - epoch))
dataset_age_formatted=$(format_age "$dataset_age_seconds")
if [[ "$VERBOSE" == "true" ]]; then
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "$snapshot_count" "$dataset_age_formatted"
fi
if [[ $OLDEST_LATEST_EPOCH -eq 0 || $epoch -lt $OLDEST_LATEST_EPOCH ]]; then
OLDEST_LATEST_EPOCH=$epoch
OLDEST_SNAPSHOT_DATASET="$dataset"
fi
else
echo "Warning: Could not determine snapshot age for $latest_snapshot" >&2
fi
fi
done
if [[ $OLDEST_LATEST_EPOCH -eq 0 ]]; then
echo "Error: No valid snapshots found for any dataset" >&2
exit 1
fi
CURRENT_EPOCH=$(date -u +%s)
TIME_DIFF_SECONDS=$((CURRENT_EPOCH - OLDEST_LATEST_EPOCH))
TIME_DIFF_FORMATTED=$(format_age "$TIME_DIFF_SECONDS")
if [[ "$VERBOSE" == "true" ]]; then
echo ""
fi
echo "Datasets processed: ${#DATASETS_TO_PROCESS[@]}"
echo "Max snapshots: $MAX_SNAPSHOT_COUNT ($MAX_SNAPSHOT_DATASET)"
echo "Oldest most recent snapshot age: $TIME_DIFF_FORMATTED ($OLDEST_SNAPSHOT_DATASET)"
EXIT_CODE=0
if [[ "$EXCEEDS_LIMIT" == "true" ]]; then
EXIT_CODE=1
fi
if [[ "$ERR_FOR_NO_SNAPSHOTS" == "true" && "$HAS_DATASET_WITHOUT_SNAPSHOTS" == "true" ]]; then
EXIT_CODE=1
fi
if [[ -n "$MAX_DURATION" ]]; then
if [[ $TIME_DIFF_SECONDS -gt $MAX_DURATION_SECONDS ]]; then
EXIT_CODE=1
fi
fi
exit $EXIT_CODE