Enhance the syslog parser with auto-reconnect, error recovery, and integrated health metrics logging. Add a cron job for automated health checks and restarts. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: 4885eae4-ffc7-4601-8f1c-5414922d5350 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/AXTUZmH
81 lines
3.1 KiB
Bash
Executable File
81 lines
3.1 KiB
Bash
Executable File
#!/bin/bash
|
|
###############################################################################
|
|
# Syslog Parser Health Check Script
|
|
# Verifica che il parser stia processando log regolarmente
|
|
# Uso: ./check_parser_health.sh
|
|
# Cron: */5 * * * * /opt/ids/deployment/check_parser_health.sh
|
|
###############################################################################
|
|
|
|
set -e
|
|
|
|
# Load environment
|
|
if [ -f /opt/ids/.env ]; then
|
|
export $(grep -v '^#' /opt/ids/.env | xargs)
|
|
fi
|
|
|
|
ALERT_THRESHOLD_MINUTES=5
|
|
LOG_FILE="/var/log/ids/parser-health.log"
|
|
|
|
mkdir -p /var/log/ids
|
|
touch "$LOG_FILE"
|
|
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] === Health Check Start ===" >> "$LOG_FILE"
|
|
|
|
# Check 1: Service running?
|
|
if ! systemctl is-active --quiet ids-syslog-parser; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ❌ CRITICAL: Parser service NOT running!" >> "$LOG_FILE"
|
|
echo "Attempting automatic restart..." >> "$LOG_FILE"
|
|
systemctl restart ids-syslog-parser
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Service restarted" >> "$LOG_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
# Check 2: Recent logs in database?
|
|
LAST_LOG_AGE=$(psql -h 127.0.0.1 -U "$PGUSER" -d "$PGDATABASE" -t -c \
|
|
"SELECT EXTRACT(EPOCH FROM (NOW() - MAX(timestamp)))/60 AS minutes_ago FROM network_logs;" | tr -d ' ')
|
|
|
|
if [ -z "$LAST_LOG_AGE" ] || [ "$LAST_LOG_AGE" = "" ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ⚠️ WARNING: Cannot determine last log age (empty database?)" >> "$LOG_FILE"
|
|
exit 0
|
|
fi
|
|
|
|
# Convert to integer (bash doesn't handle floats)
|
|
LAST_LOG_AGE_INT=$(echo "$LAST_LOG_AGE" | cut -d'.' -f1)
|
|
|
|
if [ "$LAST_LOG_AGE_INT" -gt "$ALERT_THRESHOLD_MINUTES" ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ❌ ALERT: Last log is $LAST_LOG_AGE_INT minutes old (threshold: $ALERT_THRESHOLD_MINUTES min)" >> "$LOG_FILE"
|
|
echo "Checking syslog file..." >> "$LOG_FILE"
|
|
|
|
# Check if syslog file has new data
|
|
if [ -f "/var/log/mikrotik/raw.log" ]; then
|
|
SYSLOG_SIZE=$(stat -f%z "/var/log/mikrotik/raw.log" 2>/dev/null || stat -c%s "/var/log/mikrotik/raw.log" 2>/dev/null)
|
|
echo "Syslog file size: $SYSLOG_SIZE bytes" >> "$LOG_FILE"
|
|
|
|
# Restart parser
|
|
echo "Restarting parser service..." >> "$LOG_FILE"
|
|
systemctl restart ids-syslog-parser
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Parser service restarted" >> "$LOG_FILE"
|
|
else
|
|
echo "⚠️ Syslog file not found: /var/log/mikrotik/raw.log" >> "$LOG_FILE"
|
|
fi
|
|
else
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ✅ OK: Last log ${LAST_LOG_AGE_INT} minutes ago" >> "$LOG_FILE"
|
|
fi
|
|
|
|
# Check 3: Parser errors?
|
|
ERROR_COUNT=$(journalctl -u ids-syslog-parser --since "5 minutes ago" | grep -c "\[ERROR\]" || echo "0")
|
|
|
|
if [ "$ERROR_COUNT" -gt 10 ]; then
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ⚠️ WARNING: $ERROR_COUNT errors in last 5 minutes" >> "$LOG_FILE"
|
|
journalctl -u ids-syslog-parser --since "5 minutes ago" | grep "\[ERROR\]" | tail -5 >> "$LOG_FILE"
|
|
fi
|
|
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] === Health Check Complete ===" >> "$LOG_FILE"
|
|
echo "" >> "$LOG_FILE"
|
|
|
|
# Keep only last 1000 lines of log
|
|
tail -1000 "$LOG_FILE" > "${LOG_FILE}.tmp"
|
|
mv "${LOG_FILE}.tmp" "$LOG_FILE"
|
|
|
|
exit 0
|