Initial version
This commit is contained in:
commit
5106197dc6
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Philipp Woelfel
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
45
README.md
Normal file
45
README.md
Normal file
@ -0,0 +1,45 @@
|
||||
# ZFS Snapshot Checker
|
||||
|
||||
Script to check zfs snapshot counts and age of oldest most recent snapshot. Specifically, this script iterates over a number of specified datasets, determines for each the age of the most recent snapshot (with a specified prefix), and then computes the maximum of those ages. This can be used to monitor zfs snapshotting and replication tools such as [zrepl](https://zrepl.github.io), [sanoid/syncoid](https://github.com/jimsalterjrs/sanoid/wiki). In addition, the script determines the number of snapshots for each specified dataset, as well as the maximum of those numbers. This facilitates monitoring of proper snapshot pruning.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
# Basic check
|
||||
./checkSnapshots.sh -d tank/data
|
||||
|
||||
# Recursive with verbose output
|
||||
./checkSnapshots.sh -d tank/data -r -v
|
||||
|
||||
# Provide limits (max 20 snapshots, max age 24h), resulting exit code 1, if limits are exceeded.
|
||||
./checkSnapshots.sh -d tank/data -s 20 -m 24h
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
```bash
|
||||
-d, --dataset <dataset> Dataset to check (required, repeat for multiple)
|
||||
-e, --exclude <dataset> Exclude dataset (repeat for multiple)
|
||||
-p, --prefix <prefix> Snapshot prefix (default: zrepl_)
|
||||
-f, --format <format> Timestamp format (see [below](#timestamp-formats--f))
|
||||
-s, --max-snapshots <num> Max snapshots per dataset
|
||||
-m, --max-age <duration> Max age (e.g., 31s, 17m, 24h)
|
||||
-r, --recursive Include sub-datasets
|
||||
-v, --verbose Show detailed table
|
||||
-n, --err-for-no-snapshots Error if no snapshots found
|
||||
```
|
||||
|
||||
## Timestamp Formats (`-f`)
|
||||
|
||||
- `zrepl-dense` - `20060102_150405_000`
|
||||
- `zrepl-human` - `2006-01-02_15:04:05`
|
||||
- `iso-8601` - `2006-01-02T15:04:05Z`
|
||||
- `unix-seconds` - Unix epoch
|
||||
- `sanoid` - default sanoid format
|
||||
|
||||
Without this flag, ZFS creation time will be used.
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see [LICENSE](LICENSE)
|
||||
378
checkSnapshots.sh
Executable file
378
checkSnapshots.sh
Executable file
@ -0,0 +1,378 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Checks ZFS datasets for snapshots with a specified prefix and determines the maximum
|
||||
# number of snapshots any dataset has, as well as the age of the oldest "most recent"
|
||||
# snapshot among all datasets.
|
||||
|
||||
# Default values
|
||||
PREFIX="zrepl_"
|
||||
DATASETS=()
|
||||
EXCLUDE_DATASETS=()
|
||||
MAX_DURATION=""
|
||||
MAX_DURATION_SECONDS=0
|
||||
RECURSIVE=false
|
||||
MAX_SNAPSHOTS=""
|
||||
VERBOSE=false
|
||||
ERR_FOR_NO_SNAPSHOTS=false
|
||||
FORMAT=""
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 -d|--dataset <dataset> [-d|--dataset <dataset>...] [-p|--prefix <prefix>] [-f|--format <format>] [-m <duration>] [-s|--max-snapshots <num>] [-e|--exclude <dataset>] [-r|--recursive] [-v|--verbose] [-n|--err-for-no-snapshots]"
|
||||
echo " -d, --dataset ZFS dataset to check (can be specified multiple times)"
|
||||
echo " -e, --exclude Dataset to exclude (can be specified multiple times)"
|
||||
echo " -p, --prefix Snapshot prefix (default: zrepl_)"
|
||||
echo " -f, --format Timestamp format: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid"
|
||||
echo " -m, --max-age Maximum age (e.g., 31s, 17m, 24h)"
|
||||
echo " -s, --max-snapshots Maximum allowed snapshots per dataset"
|
||||
echo " -r, --recursive Include all sub-datasets of specified datasets"
|
||||
echo " -v, --verbose Print per-dataset snapshot count and age"
|
||||
echo " -n, --err-for-no-snapshots Return error if any dataset has no snapshots"
|
||||
exit 1
|
||||
}
|
||||
|
||||
duration_to_seconds() {
|
||||
local duration="$1"
|
||||
local number="${duration%?}"
|
||||
local unit="${duration: -1}"
|
||||
|
||||
case "$unit" in
|
||||
s) echo "$number" ;;
|
||||
m) echo $((number * 60)) ;;
|
||||
h) echo $((number * 3600)) ;;
|
||||
d) echo $((number * 86400)) ;;
|
||||
*) echo "Invalid duration unit: $unit" >&2; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# timestamp_to_epoch() {
|
||||
# # Format: YYYYMMDD_HHMMSS_mmm
|
||||
# local timestamp="$1"
|
||||
# local year="${timestamp:0:4}"
|
||||
# local month="${timestamp:4:2}"
|
||||
# local day="${timestamp:6:2}"
|
||||
# local hour="${timestamp:9:2}"
|
||||
# local minute="${timestamp:11:2}"
|
||||
# local second="${timestamp:13:2}"
|
||||
|
||||
# # Convert to epoch using date command (assuming UTC)
|
||||
# date -u -d "${year}-${month}-${day} ${hour}:${minute}:${second}" +%s 2>/dev/null
|
||||
# }
|
||||
|
||||
# Format age in seconds to human-readable format
|
||||
format_age() {
|
||||
local total_seconds="$1"
|
||||
local days=$((total_seconds / 86400))
|
||||
local hours=$(((total_seconds % 86400) / 3600))
|
||||
local minutes=$(((total_seconds % 3600) / 60))
|
||||
local seconds=$((total_seconds % 60))
|
||||
|
||||
local result=""
|
||||
[[ $days -gt 0 ]] && result="${days}d "
|
||||
[[ $hours -gt 0 ]] && result="${result}${hours}h "
|
||||
[[ $minutes -gt 0 ]] && result="${result}${minutes}m "
|
||||
[[ $seconds -gt 0 || -z "$result" ]] && result="${result}${seconds}s"
|
||||
|
||||
echo "${result% }" # Remove trailing space
|
||||
}
|
||||
|
||||
# Parse snapshot name based on format type and convert to epoch
|
||||
parse_snapshot_time() {
|
||||
local snapshot_name="$1"
|
||||
local format="$2"
|
||||
|
||||
# Extract the part after @ (snapshot name)
|
||||
local snap_part="${snapshot_name##*@}"
|
||||
|
||||
# Remove prefix if present
|
||||
if [[ -n "$PREFIX" && "$snap_part" =~ ^${PREFIX} ]]; then
|
||||
snap_part="${snap_part#${PREFIX}}"
|
||||
fi
|
||||
|
||||
local parsed_date=""
|
||||
|
||||
case "$format" in
|
||||
zrepl-dense)
|
||||
# Format: YYYYMMDD_HHMMSS_mmm
|
||||
if [[ "$snap_part" =~ ^([0-9]{8})_([0-9]{6})_([0-9]{3})$ ]]; then
|
||||
local date_part="${BASH_REMATCH[1]}"
|
||||
local time_part="${BASH_REMATCH[2]}"
|
||||
parsed_date="${date_part:0:4}-${date_part:4:2}-${date_part:6:2} ${time_part:0:2}:${time_part:2:2}:${time_part:4:2}"
|
||||
date -u -d "$parsed_date" +%s 2>/dev/null
|
||||
fi
|
||||
;;
|
||||
|
||||
zrepl-human)
|
||||
# Format: YYYY-MM-DD_HH:MM:SS
|
||||
if [[ "$snap_part" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2})$ ]]; then
|
||||
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
|
||||
date -u -d "$parsed_date" +%s 2>/dev/null
|
||||
fi
|
||||
;;
|
||||
|
||||
iso-8601)
|
||||
date -u -d "$snap_part" +%s 2>/dev/null
|
||||
;;
|
||||
|
||||
unix-seconds)
|
||||
# Format: Unix epoch timestamp
|
||||
if [[ "$snap_part" =~ ^([0-9]+)$ ]]; then
|
||||
echo "${BASH_REMATCH[1]}"
|
||||
return 0
|
||||
fi
|
||||
;;
|
||||
|
||||
sanoid)
|
||||
# Format: YYYY-MM-DD_HH:MM:SS_<type>
|
||||
if [[ "$snap_part" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2}) ]]; then
|
||||
parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}"
|
||||
date -u -d "$parsed_date" +%s 2>/dev/null
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get ZFS creation time of a snapshot
|
||||
get_snapshot_creation_time() {
|
||||
local snapshot="$1"
|
||||
zfs get -H -p -o value creation "$snapshot" 2>/dev/null
|
||||
}
|
||||
|
||||
# Get all datasets to process
|
||||
get_all_datasets() {
|
||||
local dataset="$1"
|
||||
|
||||
if ! zfs list "$dataset" >/dev/null 2>&1; then
|
||||
echo "Warning: Dataset $dataset does not exist" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ "$RECURSIVE" == "true" ]]; then
|
||||
zfs list -H -r -t filesystem -o name "$dataset" 2>/dev/null || {
|
||||
echo "Warning: Failed to list sub-datasets for $dataset" >&2
|
||||
return 1
|
||||
}
|
||||
else
|
||||
echo "$dataset"
|
||||
fi
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-d|--dataset)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Dataset name required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
DATASETS+=("$2")
|
||||
shift 2
|
||||
;;
|
||||
-e|--exclude)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Dataset name required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
EXCLUDE_DATASETS+=("$2")
|
||||
shift 2
|
||||
;;
|
||||
-p|--prefix)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Prefix required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
PREFIX="$2"
|
||||
shift 2
|
||||
;;
|
||||
-f|--format)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Format name required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
case "$2" in
|
||||
zrepl-dense|zrepl-human|iso-8601|unix-seconds|sanoid)
|
||||
FORMAT="$2"
|
||||
;;
|
||||
*)
|
||||
echo "Error: Invalid format '$2'. Must be one of: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift 2
|
||||
;;
|
||||
-m|--max-age)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Duration required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
MAX_DURATION="$2"
|
||||
MAX_DURATION_SECONDS=$(duration_to_seconds "$2")
|
||||
shift 2
|
||||
;;
|
||||
-s|--max-snapshots)
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: Number required after $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: Max snapshots must be a positive integer" >&2
|
||||
exit 1
|
||||
fi
|
||||
MAX_SNAPSHOTS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-r|--recursive)
|
||||
RECURSIVE=true
|
||||
shift
|
||||
;;
|
||||
-v|--verbose)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
-n|--err-for-no-snapshots)
|
||||
ERR_FOR_NO_SNAPSHOTS=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${#DATASETS[@]} -eq 0 ]]; then
|
||||
echo "Error: At least one dataset must be specified" >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
OLDEST_LATEST_EPOCH=0
|
||||
OLDEST_SNAPSHOT_DATASET=""
|
||||
MAX_SNAPSHOT_COUNT=0
|
||||
MAX_SNAPSHOT_DATASET=""
|
||||
EXCEEDS_LIMIT=false
|
||||
HAS_DATASET_WITHOUT_SNAPSHOTS=false
|
||||
|
||||
# Collect all datasets to process (with exclusions applied) - compute once
|
||||
DATASETS_TO_PROCESS=()
|
||||
MAX_DATASET_NAME_LENGTH=0
|
||||
|
||||
for base_dataset in "${DATASETS[@]}"; do
|
||||
while IFS= read -r dataset; do
|
||||
[[ -z "$dataset" ]] && continue
|
||||
|
||||
# Check if this dataset should be excluded
|
||||
excluded=false
|
||||
for exclude_pattern in "${EXCLUDE_DATASETS[@]}"; do
|
||||
if [[ "$dataset" == "$exclude_pattern" ]]; then
|
||||
excluded=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$excluded" == "false" ]]; then
|
||||
DATASETS_TO_PROCESS+=("$dataset")
|
||||
dataset_length=${#dataset}
|
||||
if [[ $dataset_length -gt $MAX_DATASET_NAME_LENGTH ]]; then
|
||||
MAX_DATASET_NAME_LENGTH=$dataset_length
|
||||
fi
|
||||
fi
|
||||
done < <(get_all_datasets "$base_dataset")
|
||||
done
|
||||
|
||||
DATASET_COLUMN_WIDTH=$((MAX_DATASET_NAME_LENGTH + 2))
|
||||
|
||||
# Print table header
|
||||
if [[ "$VERBOSE" == "true" ]]; then
|
||||
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "Dataset" "Snapshots" "Min Snapshot Age"
|
||||
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$(printf '%*s' "$DATASET_COLUMN_WIDTH" | tr ' ' '-')" "---------" "----------------"
|
||||
fi
|
||||
|
||||
# Process each dataset from the pre-computed list
|
||||
for dataset in "${DATASETS_TO_PROCESS[@]}"; do
|
||||
|
||||
snapshots=$(zfs list -H -t snapshot -o name -s creation "$dataset" 2>/dev/null | grep "@${PREFIX}" || true)
|
||||
|
||||
if [[ -z "$snapshots" ]]; then
|
||||
HAS_DATASET_WITHOUT_SNAPSHOTS=true
|
||||
if [[ "$VERBOSE" == "true" ]]; then
|
||||
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "0" "n/a"
|
||||
else
|
||||
echo "Warning: No snapshots found for dataset $dataset with prefix $PREFIX" >&2
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
snapshot_count=$(echo "$snapshots" | wc -l)
|
||||
|
||||
if [[ $snapshot_count -gt $MAX_SNAPSHOT_COUNT ]]; then
|
||||
MAX_SNAPSHOT_COUNT=$snapshot_count
|
||||
MAX_SNAPSHOT_DATASET="$dataset"
|
||||
fi
|
||||
|
||||
if [[ -n "$MAX_SNAPSHOTS" && $snapshot_count -gt $MAX_SNAPSHOTS ]]; then
|
||||
EXCEEDS_LIMIT=true
|
||||
echo "Error: Dataset $dataset has $snapshot_count snapshots (exceeds limit of $MAX_SNAPSHOTS)" >&2
|
||||
fi
|
||||
|
||||
latest_snapshot=$(echo "$snapshots" | tail -n 1)
|
||||
|
||||
if [[ -n "$latest_snapshot" ]]; then
|
||||
if [[ -n "$FORMAT" ]]; then
|
||||
epoch=$(parse_snapshot_time "$latest_snapshot" "$FORMAT")
|
||||
else
|
||||
epoch=$(get_snapshot_creation_time "$latest_snapshot")
|
||||
fi
|
||||
|
||||
if [[ -n "$epoch" && "$epoch" -gt 0 ]]; then
|
||||
dataset_age_seconds=$(($(date +%s) - epoch))
|
||||
dataset_age_formatted=$(format_age "$dataset_age_seconds")
|
||||
|
||||
if [[ "$VERBOSE" == "true" ]]; then
|
||||
printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "$snapshot_count" "$dataset_age_formatted"
|
||||
fi
|
||||
|
||||
if [[ $OLDEST_LATEST_EPOCH -eq 0 || $epoch -lt $OLDEST_LATEST_EPOCH ]]; then
|
||||
OLDEST_LATEST_EPOCH=$epoch
|
||||
OLDEST_SNAPSHOT_DATASET="$dataset"
|
||||
fi
|
||||
else
|
||||
echo "Warning: Could not determine snapshot age for $latest_snapshot" >&2
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $OLDEST_LATEST_EPOCH -eq 0 ]]; then
|
||||
echo "Error: No valid snapshots found for any dataset" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CURRENT_EPOCH=$(date -u +%s)
|
||||
|
||||
TIME_DIFF_SECONDS=$((CURRENT_EPOCH - OLDEST_LATEST_EPOCH))
|
||||
TIME_DIFF_FORMATTED=$(format_age "$TIME_DIFF_SECONDS")
|
||||
|
||||
if [[ "$VERBOSE" == "true" ]]; then
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "Max snapshots: $MAX_SNAPSHOT_COUNT ($MAX_SNAPSHOT_DATASET)"
|
||||
echo "Oldest most recent snapshot age: $TIME_DIFF_FORMATTED ($OLDEST_SNAPSHOT_DATASET)"
|
||||
|
||||
EXIT_CODE=0
|
||||
|
||||
if [[ "$EXCEEDS_LIMIT" == "true" ]]; then
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
if [[ "$ERR_FOR_NO_SNAPSHOTS" == "true" && "$HAS_DATASET_WITHOUT_SNAPSHOTS" == "true" ]]; then
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
if [[ -n "$MAX_DURATION" ]]; then
|
||||
if [[ $TIME_DIFF_SECONDS -gt $MAX_DURATION_SECONDS ]]; then
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $EXIT_CODE
|
||||
Loading…
x
Reference in New Issue
Block a user