136 lines
6.3 KiB
Bash
136 lines
6.3 KiB
Bash
#!/usr/bin/env bash
|
||
# Beschreibung: Überwacht die S.M.A.R.T Werte eines physikalischen Devices und gibt diese als XML in PRTG aus
|
||
# Parameter: /dev/sda
|
||
# Autor: Patrick Asmus
|
||
# Web: https://www.techniverse.net
|
||
# Version: 1.1
|
||
# Datum: 28.03.2025
|
||
# Modifikation: Grenzwerte hinzugefuegt
|
||
#####################################################
|
||
|
||
DEVICE="$1"
|
||
|
||
if [ -z "$DEVICE" ]; then
|
||
echo "<prtg><error>1</error><text>Kein Gerät angegeben</text></prtg>"
|
||
exit 1
|
||
fi
|
||
|
||
if ! command -v smartctl &>/dev/null; then
|
||
echo "<prtg><error>1</error><text>smartctl nicht installiert</text></prtg>"
|
||
exit 1
|
||
fi
|
||
|
||
if [ ! -b "$DEVICE" ]; then
|
||
echo "<prtg><error>1</error><text>Gerät $DEVICE nicht gefunden</text></prtg>"
|
||
exit 1
|
||
fi
|
||
|
||
SMART_OUTPUT=$(sudo smartctl -x "$DEVICE")
|
||
|
||
# Herstellerinformationen extrahieren
|
||
MODEL=$(echo "$SMART_OUTPUT" | awk -F: '/Model Number|Device Model/ {print $2}' | xargs)
|
||
SERIAL=$(echo "$SMART_OUTPUT" | awk -F: '/Serial Number/ {print $2}' | xargs)
|
||
VENDOR=$(echo "$SMART_OUTPUT" | awk -F: '/Vendor/ {print $2}' | xargs)
|
||
[ -z "$VENDOR" ] && VENDOR=$(echo "$MODEL" | awk '{print $1}')
|
||
MODEL=${MODEL:-Unbekanntes Modell}
|
||
SERIAL=${SERIAL:-Keine Seriennummer}
|
||
VENDOR=${VENDOR:-Unbekannter Hersteller}
|
||
|
||
# Gerätetyp erkennen
|
||
if echo "$SMART_OUTPUT" | grep -q "NVMe Version"; then
|
||
TYPE="nvme"
|
||
else
|
||
TYPE="sata"
|
||
fi
|
||
|
||
XML="<prtg>"
|
||
|
||
if [ "$TYPE" = "sata" ]; then
|
||
get_value() {
|
||
echo "$SMART_OUTPUT" | awk -v id="$1" '$1 == id {print $10}'
|
||
}
|
||
|
||
TEMP=$(get_value 194)
|
||
HOURS=$(get_value 9)
|
||
REALLOC=$(get_value 5)
|
||
REALLOC_EVENT=$(get_value 196)
|
||
PENDING=$(get_value 197)
|
||
UNCORRECTABLE=$(get_value 198)
|
||
CRC_ERROR=$(get_value 199)
|
||
REPORTED_UNCORRECT=$(get_value 187)
|
||
|
||
for var in TEMP HOURS REALLOC REALLOC_EVENT PENDING UNCORRECTABLE CRC_ERROR REPORTED_UNCORRECT; do
|
||
eval "[ -z \$$var ] && $var=0"
|
||
done
|
||
|
||
XML+="
|
||
<result><channel>Temperature (°C)</channel><value>$TEMP</value><unit>Temperature</unit><limitmode>1</limitmode><limitmaxwarning>45</limitmaxwarning><limitmaxerror>55</limitmaxerror></result>
|
||
<result><channel>Power-On Hours</channel><value>$HOURS</value><unit>Hours</unit></result>
|
||
<result><channel>Reallocated Sectors</channel><value>$REALLOC</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>10</limitmaxerror></result>
|
||
<result><channel>Reallocated Events</channel><value>$REALLOC_EVENT</value><unit>Count</unit></result>
|
||
<result><channel>Pending Sectors</channel><value>$PENDING</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
|
||
<result><channel>Offline Uncorrectable</channel><value>$UNCORRECTABLE</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
|
||
<result><channel>Reported Uncorrect</channel><value>$REPORTED_UNCORRECT</value><unit>Count</unit></result>
|
||
<result><channel>CRC Error Count</channel><value>$CRC_ERROR</value><unit>Count</unit></result>
|
||
<text>$VENDOR | $MODEL | SN: $SERIAL | $DEVICE</text>"
|
||
|
||
elif [ "$TYPE" = "nvme" ]; then
|
||
get_nvme_value() {
|
||
echo "$SMART_OUTPUT" | grep -E "^$1:" | head -n1 | awk -F: '{gsub(/^[ \t]+|[ \t]+$/, "", $2); print $2}'
|
||
}
|
||
|
||
TEMP=$(get_nvme_value "Temperature" | awk '{print $1}')
|
||
HOURS=$(get_nvme_value "Power On Hours" | sed 's/\.//g')
|
||
PERCENT_USED=$(get_nvme_value "Percentage Used" | sed 's/%//')
|
||
MEDIA_ERRORS=$(get_nvme_value "Media and Data Integrity Errors")
|
||
ERROR_LOGS=$(get_nvme_value "Error Information Log Entries")
|
||
UNSAFE_SHUTDOWNS=$(get_nvme_value "Unsafe Shutdowns")
|
||
WARN_TEMP_TIME=$(get_nvme_value "Warning Comp. Temperature Time")
|
||
CRIT_TEMP_TIME=$(get_nvme_value "Critical Comp. Temperature Time")
|
||
POWER_CYCLES=$(get_nvme_value "Power Cycles")
|
||
WRITTEN_TB=$(get_nvme_value "Data Units Written" | grep -o '\[[0-9.]* TB\]' | tr -d '[]TB ')
|
||
READ_TB=$(get_nvme_value "Data Units Read" | grep -o '\[[0-9.]* TB\]' | tr -d '[]TB ')
|
||
|
||
# === Wear Level Verlauf verfolgen ===
|
||
CACHE_FILE="/var/prtg/scriptsxml/.smart_wear_${DEVICE//\//_}.cache"
|
||
NOW_DATE=$(date '+%Y-%m-%d')
|
||
if [ -f "$CACHE_FILE" ]; then
|
||
LAST_LEVEL=$(awk -F: '{print $1}' "$CACHE_FILE")
|
||
LAST_DATE=$(awk -F: '{print $2}' "$CACHE_FILE")
|
||
DIFF=$((PERCENT_USED - LAST_LEVEL))
|
||
|
||
if [ "$DIFF" -ne 0 ]; then
|
||
TREND=" – +$DIFF % seit $LAST_DATE"
|
||
echo "$PERCENT_USED:$NOW_DATE" > "$CACHE_FILE"
|
||
else
|
||
TREND=" – unverändert seit $LAST_DATE"
|
||
fi
|
||
else
|
||
echo "$PERCENT_USED:$NOW_DATE" > "$CACHE_FILE"
|
||
TREND=" – erster Messpunkt"
|
||
fi
|
||
|
||
for var in TEMP HOURS PERCENT_USED MEDIA_ERRORS ERROR_LOGS UNSAFE_SHUTDOWNS WARN_TEMP_TIME CRIT_TEMP_TIME POWER_CYCLES WRITTEN_TB READ_TB; do
|
||
eval "[ -z \$$var ] && $var=0"
|
||
done
|
||
|
||
XML+="
|
||
<result><channel>Temperature (°C)</channel><value>$TEMP</value><unit>Temperature</unit><limitmode>1</limitmode><limitmaxwarning>65</limitmaxwarning><limitmaxerror>80</limitmaxerror></result>
|
||
<result><channel>Power-On Hours</channel><value>$HOURS</value><unit>Hours</unit></result>
|
||
<result><channel>Wear Level (Percentage Used)</channel><value>$PERCENT_USED</value><unit>Percent</unit><limitmode>1</limitmode><limitmaxwarning>70</limitmaxwarning><limitmaxerror>90</limitmaxerror></result>
|
||
<result><channel>Power Cycles</channel><value>$POWER_CYCLES</value><unit>Count</unit></result>
|
||
<result><channel>Media/Data Errors</channel><value>$MEDIA_ERRORS</value><unit>Count</unit><limitmode>1</limitmode><limitmaxerror>1</limitmaxerror></result>
|
||
<result><channel>SMART Error Logs</channel><value>$ERROR_LOGS</value><unit>Count</unit><limitmode>1</limitmode><limitmaxwarning>10</limitmaxwarning><limitmaxerror>50</limitmaxerror></result>
|
||
<result><channel>Unsafe Shutdowns</channel><value>$UNSAFE_SHUTDOWNS</value><unit>Count</unit></result>
|
||
<result><channel>Warning Temp Time</channel><value>$WARN_TEMP_TIME</value><unit>TimeSeconds</unit></result>
|
||
<result><channel>Critical Temp Time</channel><value>$CRIT_TEMP_TIME</value><unit>TimeSeconds</unit><limitmode>1</limitmode><limitmaxerror>10</limitmaxerror></result>
|
||
<result><channel>Data Written (TB)</channel><value>$WRITTEN_TB</value><unit>Custom</unit></result>
|
||
<result><channel>Data Read (TB)</channel><value>$READ_TB</value><unit>Custom</unit></result>
|
||
<text>$VENDOR | $MODEL | SN: $SERIAL | $DEVICE | Wear: $PERCENT_USED %$TREND</text>"
|
||
else
|
||
XML+="<error>1</error><text>Unbekannter Gerätetyp</text>"
|
||
fi
|
||
|
||
XML+="</prtg>"
|
||
echo "$XML"
|