#!/bin/bash ############################################################################### # Syslog Parser Health Check Script # Verifica che il parser stia processando log regolarmente # Uso: ./check_parser_health.sh # Cron: */5 * * * * /opt/ids/deployment/check_parser_health.sh ############################################################################### set -e # Load environment if [ -f /opt/ids/.env ]; then export $(grep -v '^#' /opt/ids/.env | xargs) fi ALERT_THRESHOLD_MINUTES=5 LOG_FILE="/var/log/ids/parser-health.log" mkdir -p /var/log/ids touch "$LOG_FILE" echo "[$(date '+%Y-%m-%d %H:%M:%S')] === Health Check Start ===" >> "$LOG_FILE" # Check 1: Service running? if ! systemctl is-active --quiet ids-syslog-parser; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] ❌ CRITICAL: Parser service NOT running!" >> "$LOG_FILE" echo "Attempting automatic restart..." >> "$LOG_FILE" systemctl restart ids-syslog-parser echo "[$(date '+%Y-%m-%d %H:%M:%S')] Service restarted" >> "$LOG_FILE" exit 1 fi # Check 2: Recent logs in database? LAST_LOG_AGE=$(psql -h 127.0.0.1 -U "$PGUSER" -d "$PGDATABASE" -t -c \ "SELECT EXTRACT(EPOCH FROM (NOW() - MAX(timestamp)))/60 AS minutes_ago FROM network_logs;" | tr -d ' ') if [ -z "$LAST_LOG_AGE" ] || [ "$LAST_LOG_AGE" = "" ]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] ⚠️ WARNING: Cannot determine last log age (empty database?)" >> "$LOG_FILE" exit 0 fi # Convert to integer (bash doesn't handle floats) LAST_LOG_AGE_INT=$(echo "$LAST_LOG_AGE" | cut -d'.' -f1) if [ "$LAST_LOG_AGE_INT" -gt "$ALERT_THRESHOLD_MINUTES" ]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] ❌ ALERT: Last log is $LAST_LOG_AGE_INT minutes old (threshold: $ALERT_THRESHOLD_MINUTES min)" >> "$LOG_FILE" echo "Checking syslog file..." >> "$LOG_FILE" # Check if syslog file has new data if [ -f "/var/log/mikrotik/raw.log" ]; then SYSLOG_SIZE=$(stat -f%z "/var/log/mikrotik/raw.log" 2>/dev/null || stat -c%s "/var/log/mikrotik/raw.log" 2>/dev/null) echo "Syslog file size: $SYSLOG_SIZE bytes" >> "$LOG_FILE" # Restart parser echo "Restarting parser service..." >> "$LOG_FILE" systemctl restart ids-syslog-parser echo "[$(date '+%Y-%m-%d %H:%M:%S')] Parser service restarted" >> "$LOG_FILE" else echo "⚠️ Syslog file not found: /var/log/mikrotik/raw.log" >> "$LOG_FILE" fi else echo "[$(date '+%Y-%m-%d %H:%M:%S')] ✅ OK: Last log ${LAST_LOG_AGE_INT} minutes ago" >> "$LOG_FILE" fi # Check 3: Parser errors? ERROR_COUNT=$(journalctl -u ids-syslog-parser --since "5 minutes ago" | grep -c "\[ERROR\]" || echo "0") if [ "$ERROR_COUNT" -gt 10 ]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] ⚠️ WARNING: $ERROR_COUNT errors in last 5 minutes" >> "$LOG_FILE" journalctl -u ids-syslog-parser --since "5 minutes ago" | grep "\[ERROR\]" | tail -5 >> "$LOG_FILE" fi echo "[$(date '+%Y-%m-%d %H:%M:%S')] === Health Check Complete ===" >> "$LOG_FILE" echo "" >> "$LOG_FILE" # Keep only last 1000 lines of log tail -1000 "$LOG_FILE" > "${LOG_FILE}.tmp" mv "${LOG_FILE}.tmp" "$LOG_FILE" exit 0