From 5106197dc6b75d3dfaa4ebb5fdf5f898f69105ee Mon Sep 17 00:00:00 2001 From: Philipp Woelfel Date: Sat, 15 Nov 2025 19:56:48 -0700 Subject: [PATCH] Initial version --- LICENSE | 21 +++ README.md | 45 ++++++ checkSnapshots.sh | 378 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 444 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100755 checkSnapshots.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3b44173 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Philipp Woelfel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a219997 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# ZFS Snapshot Checker + +Script to check zfs snapshot counts and age of oldest most recent snapshot. Specifically, this script iterates over a number of specified datasets, determines for each the age of the most recent snapshot (with a specified prefix), and then computes the maximum of those ages. This can be used to monitor zfs snapshotting and replication tools such as [zrepl](https://zrepl.github.io), [sanoid/syncoid](https://github.com/jimsalterjrs/sanoid/wiki). In addition, the script determines the number of snapshots for each specified dataset, as well as the maximum of those numbers. This facilitates monitoring of proper snapshot pruning. + + +## Examples + +```bash +# Basic check +./checkSnapshots.sh -d tank/data + +# Recursive with verbose output +./checkSnapshots.sh -d tank/data -r -v + +# Provide limits (max 20 snapshots, max age 24h), resulting exit code 1, if limits are exceeded. +./checkSnapshots.sh -d tank/data -s 20 -m 24h +``` + +## Options + +```bash +-d, --dataset Dataset to check (required, repeat for multiple) +-e, --exclude Exclude dataset (repeat for multiple) +-p, --prefix Snapshot prefix (default: zrepl_) +-f, --format Timestamp format (see [below](#timestamp-formats--f)) +-s, --max-snapshots Max snapshots per dataset +-m, --max-age Max age (e.g., 31s, 17m, 24h) +-r, --recursive Include sub-datasets +-v, --verbose Show detailed table +-n, --err-for-no-snapshots Error if no snapshots found +``` + +## Timestamp Formats (`-f`) + +- `zrepl-dense` - `20060102_150405_000` +- `zrepl-human` - `2006-01-02_15:04:05` +- `iso-8601` - `2006-01-02T15:04:05Z` +- `unix-seconds` - Unix epoch +- `sanoid` - default sanoid format + +Without this flag, ZFS creation time will be used. + +## License + +MIT License - see [LICENSE](LICENSE) \ No newline at end of file diff --git a/checkSnapshots.sh b/checkSnapshots.sh new file mode 100755 index 0000000..e4f1908 --- /dev/null +++ b/checkSnapshots.sh @@ -0,0 +1,378 @@ +#!/usr/bin/env bash + +# Checks ZFS datasets for snapshots with a specified prefix and determines the maximum +# number of snapshots any dataset has, as well as the age of the oldest "most recent" +# snapshot among all datasets. + +# Default values +PREFIX="zrepl_" +DATASETS=() +EXCLUDE_DATASETS=() +MAX_DURATION="" +MAX_DURATION_SECONDS=0 +RECURSIVE=false +MAX_SNAPSHOTS="" +VERBOSE=false +ERR_FOR_NO_SNAPSHOTS=false +FORMAT="" + +usage() { + echo "Usage: $0 -d|--dataset [-d|--dataset ...] [-p|--prefix ] [-f|--format ] [-m ] [-s|--max-snapshots ] [-e|--exclude ] [-r|--recursive] [-v|--verbose] [-n|--err-for-no-snapshots]" + echo " -d, --dataset ZFS dataset to check (can be specified multiple times)" + echo " -e, --exclude Dataset to exclude (can be specified multiple times)" + echo " -p, --prefix Snapshot prefix (default: zrepl_)" + echo " -f, --format Timestamp format: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid" + echo " -m, --max-age Maximum age (e.g., 31s, 17m, 24h)" + echo " -s, --max-snapshots Maximum allowed snapshots per dataset" + echo " -r, --recursive Include all sub-datasets of specified datasets" + echo " -v, --verbose Print per-dataset snapshot count and age" + echo " -n, --err-for-no-snapshots Return error if any dataset has no snapshots" + exit 1 +} + +duration_to_seconds() { + local duration="$1" + local number="${duration%?}" + local unit="${duration: -1}" + + case "$unit" in + s) echo "$number" ;; + m) echo $((number * 60)) ;; + h) echo $((number * 3600)) ;; + d) echo $((number * 86400)) ;; + *) echo "Invalid duration unit: $unit" >&2; exit 1 ;; + esac +} + +# timestamp_to_epoch() { +# # Format: YYYYMMDD_HHMMSS_mmm +# local timestamp="$1" +# local year="${timestamp:0:4}" +# local month="${timestamp:4:2}" +# local day="${timestamp:6:2}" +# local hour="${timestamp:9:2}" +# local minute="${timestamp:11:2}" +# local second="${timestamp:13:2}" + +# # Convert to epoch using date command (assuming UTC) +# date -u -d "${year}-${month}-${day} ${hour}:${minute}:${second}" +%s 2>/dev/null +# } + +# Format age in seconds to human-readable format +format_age() { + local total_seconds="$1" + local days=$((total_seconds / 86400)) + local hours=$(((total_seconds % 86400) / 3600)) + local minutes=$(((total_seconds % 3600) / 60)) + local seconds=$((total_seconds % 60)) + + local result="" + [[ $days -gt 0 ]] && result="${days}d " + [[ $hours -gt 0 ]] && result="${result}${hours}h " + [[ $minutes -gt 0 ]] && result="${result}${minutes}m " + [[ $seconds -gt 0 || -z "$result" ]] && result="${result}${seconds}s" + + echo "${result% }" # Remove trailing space +} + +# Parse snapshot name based on format type and convert to epoch +parse_snapshot_time() { + local snapshot_name="$1" + local format="$2" + + # Extract the part after @ (snapshot name) + local snap_part="${snapshot_name##*@}" + + # Remove prefix if present + if [[ -n "$PREFIX" && "$snap_part" =~ ^${PREFIX} ]]; then + snap_part="${snap_part#${PREFIX}}" + fi + + local parsed_date="" + + case "$format" in + zrepl-dense) + # Format: YYYYMMDD_HHMMSS_mmm + if [[ "$snap_part" =~ ^([0-9]{8})_([0-9]{6})_([0-9]{3})$ ]]; then + local date_part="${BASH_REMATCH[1]}" + local time_part="${BASH_REMATCH[2]}" + parsed_date="${date_part:0:4}-${date_part:4:2}-${date_part:6:2} ${time_part:0:2}:${time_part:2:2}:${time_part:4:2}" + date -u -d "$parsed_date" +%s 2>/dev/null + fi + ;; + + zrepl-human) + # Format: YYYY-MM-DD_HH:MM:SS + if [[ "$snap_part" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2})$ ]]; then + parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}" + date -u -d "$parsed_date" +%s 2>/dev/null + fi + ;; + + iso-8601) + date -u -d "$snap_part" +%s 2>/dev/null + ;; + + unix-seconds) + # Format: Unix epoch timestamp + if [[ "$snap_part" =~ ^([0-9]+)$ ]]; then + echo "${BASH_REMATCH[1]}" + return 0 + fi + ;; + + sanoid) + # Format: YYYY-MM-DD_HH:MM:SS_ + if [[ "$snap_part" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2})_([0-9]{2}:[0-9]{2}:[0-9]{2}) ]]; then + parsed_date="${BASH_REMATCH[1]} ${BASH_REMATCH[2]}" + date -u -d "$parsed_date" +%s 2>/dev/null + fi + ;; + esac +} + +# Get ZFS creation time of a snapshot +get_snapshot_creation_time() { + local snapshot="$1" + zfs get -H -p -o value creation "$snapshot" 2>/dev/null +} + +# Get all datasets to process +get_all_datasets() { + local dataset="$1" + + if ! zfs list "$dataset" >/dev/null 2>&1; then + echo "Warning: Dataset $dataset does not exist" >&2 + return 1 + fi + + if [[ "$RECURSIVE" == "true" ]]; then + zfs list -H -r -t filesystem -o name "$dataset" 2>/dev/null || { + echo "Warning: Failed to list sub-datasets for $dataset" >&2 + return 1 + } + else + echo "$dataset" + fi +} + +while [[ $# -gt 0 ]]; do + case $1 in + -d|--dataset) + if [[ -z "$2" ]]; then + echo "Error: Dataset name required after $1" >&2 + exit 1 + fi + DATASETS+=("$2") + shift 2 + ;; + -e|--exclude) + if [[ -z "$2" ]]; then + echo "Error: Dataset name required after $1" >&2 + exit 1 + fi + EXCLUDE_DATASETS+=("$2") + shift 2 + ;; + -p|--prefix) + if [[ -z "$2" ]]; then + echo "Error: Prefix required after $1" >&2 + exit 1 + fi + PREFIX="$2" + shift 2 + ;; + -f|--format) + if [[ -z "$2" ]]; then + echo "Error: Format name required after $1" >&2 + exit 1 + fi + case "$2" in + zrepl-dense|zrepl-human|iso-8601|unix-seconds|sanoid) + FORMAT="$2" + ;; + *) + echo "Error: Invalid format '$2'. Must be one of: zrepl-dense, zrepl-human, iso-8601, unix-seconds, sanoid" >&2 + exit 1 + ;; + esac + shift 2 + ;; + -m|--max-age) + if [[ -z "$2" ]]; then + echo "Error: Duration required after $1" >&2 + exit 1 + fi + MAX_DURATION="$2" + MAX_DURATION_SECONDS=$(duration_to_seconds "$2") + shift 2 + ;; + -s|--max-snapshots) + if [[ -z "$2" ]]; then + echo "Error: Number required after $1" >&2 + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]+$ ]]; then + echo "Error: Max snapshots must be a positive integer" >&2 + exit 1 + fi + MAX_SNAPSHOTS="$2" + shift 2 + ;; + -r|--recursive) + RECURSIVE=true + shift + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + -n|--err-for-no-snapshots) + ERR_FOR_NO_SNAPSHOTS=true + shift + ;; + -h|--help) + usage + ;; + *) + echo "Unknown option: $1" >&2 + usage + ;; + esac +done + +if [[ ${#DATASETS[@]} -eq 0 ]]; then + echo "Error: At least one dataset must be specified" >&2 + usage +fi + +OLDEST_LATEST_EPOCH=0 +OLDEST_SNAPSHOT_DATASET="" +MAX_SNAPSHOT_COUNT=0 +MAX_SNAPSHOT_DATASET="" +EXCEEDS_LIMIT=false +HAS_DATASET_WITHOUT_SNAPSHOTS=false + +# Collect all datasets to process (with exclusions applied) - compute once +DATASETS_TO_PROCESS=() +MAX_DATASET_NAME_LENGTH=0 + +for base_dataset in "${DATASETS[@]}"; do + while IFS= read -r dataset; do + [[ -z "$dataset" ]] && continue + + # Check if this dataset should be excluded + excluded=false + for exclude_pattern in "${EXCLUDE_DATASETS[@]}"; do + if [[ "$dataset" == "$exclude_pattern" ]]; then + excluded=true + break + fi + done + + if [[ "$excluded" == "false" ]]; then + DATASETS_TO_PROCESS+=("$dataset") + dataset_length=${#dataset} + if [[ $dataset_length -gt $MAX_DATASET_NAME_LENGTH ]]; then + MAX_DATASET_NAME_LENGTH=$dataset_length + fi + fi + done < <(get_all_datasets "$base_dataset") +done + +DATASET_COLUMN_WIDTH=$((MAX_DATASET_NAME_LENGTH + 2)) + +# Print table header +if [[ "$VERBOSE" == "true" ]]; then + printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "Dataset" "Snapshots" "Min Snapshot Age" + printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$(printf '%*s' "$DATASET_COLUMN_WIDTH" | tr ' ' '-')" "---------" "----------------" +fi + +# Process each dataset from the pre-computed list +for dataset in "${DATASETS_TO_PROCESS[@]}"; do + + snapshots=$(zfs list -H -t snapshot -o name -s creation "$dataset" 2>/dev/null | grep "@${PREFIX}" || true) + + if [[ -z "$snapshots" ]]; then + HAS_DATASET_WITHOUT_SNAPSHOTS=true + if [[ "$VERBOSE" == "true" ]]; then + printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "0" "n/a" + else + echo "Warning: No snapshots found for dataset $dataset with prefix $PREFIX" >&2 + fi + continue + fi + + snapshot_count=$(echo "$snapshots" | wc -l) + + if [[ $snapshot_count -gt $MAX_SNAPSHOT_COUNT ]]; then + MAX_SNAPSHOT_COUNT=$snapshot_count + MAX_SNAPSHOT_DATASET="$dataset" + fi + + if [[ -n "$MAX_SNAPSHOTS" && $snapshot_count -gt $MAX_SNAPSHOTS ]]; then + EXCEEDS_LIMIT=true + echo "Error: Dataset $dataset has $snapshot_count snapshots (exceeds limit of $MAX_SNAPSHOTS)" >&2 + fi + + latest_snapshot=$(echo "$snapshots" | tail -n 1) + + if [[ -n "$latest_snapshot" ]]; then + if [[ -n "$FORMAT" ]]; then + epoch=$(parse_snapshot_time "$latest_snapshot" "$FORMAT") + else + epoch=$(get_snapshot_creation_time "$latest_snapshot") + fi + + if [[ -n "$epoch" && "$epoch" -gt 0 ]]; then + dataset_age_seconds=$(($(date +%s) - epoch)) + dataset_age_formatted=$(format_age "$dataset_age_seconds") + + if [[ "$VERBOSE" == "true" ]]; then + printf "%-${DATASET_COLUMN_WIDTH}s %10s %20s\n" "$dataset" "$snapshot_count" "$dataset_age_formatted" + fi + + if [[ $OLDEST_LATEST_EPOCH -eq 0 || $epoch -lt $OLDEST_LATEST_EPOCH ]]; then + OLDEST_LATEST_EPOCH=$epoch + OLDEST_SNAPSHOT_DATASET="$dataset" + fi + else + echo "Warning: Could not determine snapshot age for $latest_snapshot" >&2 + fi + fi +done + +if [[ $OLDEST_LATEST_EPOCH -eq 0 ]]; then + echo "Error: No valid snapshots found for any dataset" >&2 + exit 1 +fi + +CURRENT_EPOCH=$(date -u +%s) + +TIME_DIFF_SECONDS=$((CURRENT_EPOCH - OLDEST_LATEST_EPOCH)) +TIME_DIFF_FORMATTED=$(format_age "$TIME_DIFF_SECONDS") + +if [[ "$VERBOSE" == "true" ]]; then + echo "" +fi + +echo "Max snapshots: $MAX_SNAPSHOT_COUNT ($MAX_SNAPSHOT_DATASET)" +echo "Oldest most recent snapshot age: $TIME_DIFF_FORMATTED ($OLDEST_SNAPSHOT_DATASET)" + +EXIT_CODE=0 + +if [[ "$EXCEEDS_LIMIT" == "true" ]]; then + EXIT_CODE=1 +fi + +if [[ "$ERR_FOR_NO_SNAPSHOTS" == "true" && "$HAS_DATASET_WITHOUT_SNAPSHOTS" == "true" ]]; then + EXIT_CODE=1 +fi + +if [[ -n "$MAX_DURATION" ]]; then + if [[ $TIME_DIFF_SECONDS -gt $MAX_DURATION_SECONDS ]]; then + EXIT_CODE=1 + fi +fi + +exit $EXIT_CODE \ No newline at end of file