prtg-smart-health-check/prtg-smart-health-check.v1.sh
2025-03-29 01:38:28 +01:00

136 lines
6.3 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# Beschreibung: Überwacht die S.M.A.R.T Werte eines physikalischen Devices und gibt diese als XML in PRTG aus
# Parameter: /dev/sda
# Autor: Patrick Asmus
# Web: https://www.techniverse.net
# Version: 1.1
# Datum: 28.03.2025
# Modifikation: Grenzwerte hinzugefuegt
#####################################################
DEVICE="$1"
if [ -z "$DEVICE" ]; then
echo "<prtg><error>1</error><text>Kein Gerät angegeben</text></prtg>"
exit 1
fi
if ! command -v smartctl &>/dev/null; then
echo "<prtg><error>1</error><text>smartctl nicht installiert</text></prtg>"
exit 1
fi
if [ ! -b "$DEVICE" ]; then
echo "<prtg><error>1</error><text>Gerät $DEVICE nicht gefunden</text></prtg>"
exit 1
fi
SMART_OUTPUT=$(sudo smartctl -x "$DEVICE")
# Herstellerinformationen extrahieren
MODEL=$(echo "$SMART_OUTPUT" | awk -F: '/Model Number|Device Model/ {print $2}' | xargs)
SERIAL=$(echo "$SMART_OUTPUT" | awk -F: '/Serial Number/ {print $2}' | xargs)
VENDOR=$(echo "$SMART_OUTPUT" | awk -F: '/Vendor/ {print $2}' | xargs)
[ -z "$VENDOR" ] && VENDOR=$(echo "$MODEL" | awk '{print $1}')
MODEL=${MODEL:-Unbekanntes Modell}
SERIAL=${SERIAL:-Keine Seriennummer}
VENDOR=${VENDOR:-Unbekannter Hersteller}
# Gerätetyp erkennen
if echo "$SMART_OUTPUT" | grep -q "NVMe Version"; then
TYPE="nvme"
else
TYPE="sata"
fi
XML="<prtg>"
if [ "$TYPE" = "sata" ]; then
get_value() {
echo "$SMART_OUTPUT" | awk -v id="$1" '$1 == id {print $10}'
}
TEMP=$(get_value 194)
HOURS=$(get_value 9)
REALLOC=$(get_value 5)
REALLOC_EVENT=$(get_value 196)
PENDING=$(get_value 197)
UNCORRECTABLE=$(get_value 198)
CRC_ERROR=$(get_value 199)
REPORTED_UNCORRECT=$(get_value 187)
for var in TEMP HOURS REALLOC REALLOC_EVENT PENDING UNCORRECTABLE CRC_ERROR REPORTED_UNCORRECT; do
eval "[ -z \$$var ] && $var=0"
done
XML+="
<result><channel>Temperature (°C)</channel><value>$TEMP</value><unit>Temperature</unit><limitmode>1</limitmode><limitmaxwarning>45</limitmaxwarning><limitmaxerror>55</limitmaxerror></result>
<result><channel>Power-On Hours</channel><value>$HOURS</value><unit>Hours</unit></result>
<result><channel>Reallocated Sectors</channel><value>$REALLOC</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>10</limitmaxerror></result>
<result><channel>Reallocated Events</channel><value>$REALLOC_EVENT</value><unit>Count</unit></result>
<result><channel>Pending Sectors</channel><value>$PENDING</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
<result><channel>Offline Uncorrectable</channel><value>$UNCORRECTABLE</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
<result><channel>Reported Uncorrect</channel><value>$REPORTED_UNCORRECT</value><unit>Count</unit></result>
<result><channel>CRC Error Count</channel><value>$CRC_ERROR</value><unit>Count</unit></result>
<text>$VENDOR | $MODEL | SN: $SERIAL | $DEVICE</text>"
elif [ "$TYPE" = "nvme" ]; then
get_nvme_value() {
echo "$SMART_OUTPUT" | grep -E "^$1:" | head -n1 | awk -F: '{gsub(/^[ \t]+|[ \t]+$/, "", $2); print $2}'
}
TEMP=$(get_nvme_value "Temperature" | awk '{print $1}')
HOURS=$(get_nvme_value "Power On Hours" | sed 's/\.//g')
PERCENT_USED=$(get_nvme_value "Percentage Used" | sed 's/%//')
MEDIA_ERRORS=$(get_nvme_value "Media and Data Integrity Errors")
ERROR_LOGS=$(get_nvme_value "Error Information Log Entries")
UNSAFE_SHUTDOWNS=$(get_nvme_value "Unsafe Shutdowns")
WARN_TEMP_TIME=$(get_nvme_value "Warning Comp. Temperature Time")
CRIT_TEMP_TIME=$(get_nvme_value "Critical Comp. Temperature Time")
POWER_CYCLES=$(get_nvme_value "Power Cycles")
WRITTEN_TB=$(get_nvme_value "Data Units Written" | grep -o '\[[0-9.]* TB\]' | tr -d '[]TB ')
READ_TB=$(get_nvme_value "Data Units Read" | grep -o '\[[0-9.]* TB\]' | tr -d '[]TB ')
# === Wear Level Verlauf verfolgen ===
CACHE_FILE="/var/prtg/scriptsxml/.smart_wear_${DEVICE//\//_}.cache"
NOW_DATE=$(date '+%Y-%m-%d')
if [ -f "$CACHE_FILE" ]; then
LAST_LEVEL=$(awk -F: '{print $1}' "$CACHE_FILE")
LAST_DATE=$(awk -F: '{print $2}' "$CACHE_FILE")
DIFF=$((PERCENT_USED - LAST_LEVEL))
if [ "$DIFF" -ne 0 ]; then
TREND=" +$DIFF % seit $LAST_DATE"
echo "$PERCENT_USED:$NOW_DATE" > "$CACHE_FILE"
else
TREND=" unverändert seit $LAST_DATE"
fi
else
echo "$PERCENT_USED:$NOW_DATE" > "$CACHE_FILE"
TREND=" erster Messpunkt"
fi
for var in TEMP HOURS PERCENT_USED MEDIA_ERRORS ERROR_LOGS UNSAFE_SHUTDOWNS WARN_TEMP_TIME CRIT_TEMP_TIME POWER_CYCLES WRITTEN_TB READ_TB; do
eval "[ -z \$$var ] && $var=0"
done
XML+="
<result><channel>Temperature (°C)</channel><value>$TEMP</value><unit>Temperature</unit><limitmode>1</limitmode><limitmaxwarning>65</limitmaxwarning><limitmaxerror>80</limitmaxerror></result>
<result><channel>Power-On Hours</channel><value>$HOURS</value><unit>Hours</unit></result>
<result><channel>Wear Level (Percentage Used)</channel><value>$PERCENT_USED</value><unit>Percent</unit><limitmode>1</limitmode><limitmaxwarning>70</limitmaxwarning><limitmaxerror>90</limitmaxerror></result>
<result><channel>Power Cycles</channel><value>$POWER_CYCLES</value><unit>Count</unit></result>
<result><channel>Media/Data Errors</channel><value>$MEDIA_ERRORS</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
<result><channel>SMART Error Logs</channel><value>$ERROR_LOGS</value><unit>Count</unit><limitmode>1</limitmode><limitmaxwarning>10</limitmaxwarning><limitmaxerror>50</limitmaxerror></result>
<result><channel>Unsafe Shutdowns</channel><value>$UNSAFE_SHUTDOWNS</value><unit>Count</unit></result>
<result><channel>Warning Temp Time</channel><value>$WARN_TEMP_TIME</value><unit>TimeSeconds</unit></result>
<result><channel>Critical Temp Time</channel><value>$CRIT_TEMP_TIME</value><unit>TimeSeconds</unit><limitmode>1</limitmode><limitmaxerror>10</limitmaxerror></result>
<result><channel>Data Written (TB)</channel><value>$WRITTEN_TB</value><unit>Custom</unit></result>
<result><channel>Data Read (TB)</channel><value>$READ_TB</value><unit>Custom</unit></result>
<text>$VENDOR | $MODEL | SN: $SERIAL | $DEVICE | Wear: $PERCENT_USED %$TREND</text>"
else
XML+="<error>1</error><text>Unbekannter Gerätetyp</text>"
fi
XML+="</prtg>"
echo "$XML"