#!/bin/bash # # Training Hybrid ML Detector su Dati Reali # Legge credenziali da /opt/ids/.env automaticamente # set -e # Exit on error echo "=======================================================================" echo " TRAINING HYBRID ML DETECTOR - DATI REALI" echo "=======================================================================" echo "" # Percorsi IDS_ROOT="/opt/ids" ENV_FILE="$IDS_ROOT/.env" PYTHON_ML_DIR="$IDS_ROOT/python_ml" VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python" # Verifica file .env esiste if [ ! -f "$ENV_FILE" ]; then echo "❌ ERRORE: File .env non trovato in $ENV_FILE" exit 1 fi # Carica variabili da .env echo "📂 Caricamento credenziali database da .env..." source "$ENV_FILE" # Estrai credenziali database DB_HOST="${PGHOST:-localhost}" DB_PORT="${PGPORT:-5432}" DB_NAME="${PGDATABASE:-ids}" DB_USER="${PGUSER:-postgres}" DB_PASSWORD="${PGPASSWORD}" # Verifica password estratta if [ -z "$DB_PASSWORD" ]; then echo "❌ ERRORE: PGPASSWORD non trovata nel file .env" echo " Aggiungi: PGPASSWORD=tua_password_qui" exit 1 fi echo "✅ Credenziali caricate:" echo " Host: $DB_HOST" echo " Port: $DB_PORT" echo " Database: $DB_NAME" echo " User: $DB_USER" echo " Password: ****** (nascosta)" echo "" # Parametri training DAYS="${1:-7}" # Default 7 giorni, puoi passare come argomento MAX_SAMPLES="${2:-1000000}" # Default 1M records max echo "🎯 Parametri training:" echo " Periodo: ultimi $DAYS giorni" echo " Max records: $MAX_SAMPLES" echo "" # Verifica venv Python if [ ! -f "$VENV_PYTHON" ]; then echo "❌ ERRORE: Virtual environment non trovato in $VENV_PYTHON" echo " Esegui prima: cd $IDS_ROOT && python3 -m venv python_ml/venv" exit 1 fi echo "🐍 Python: $VENV_PYTHON" echo "" # Verifica dati disponibili nel database echo "📊 Verifica dati disponibili nel database..." PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c " SELECT TO_CHAR(MIN(timestamp), 'YYYY-MM-DD HH24:MI:SS') as primo_log, TO_CHAR(MAX(timestamp), 'YYYY-MM-DD HH24:MI:SS') as ultimo_log, EXTRACT(DAY FROM (MAX(timestamp) - MIN(timestamp))) || ' giorni' as periodo_totale, TO_CHAR(COUNT(*), 'FM999,999,999') as totale_records FROM network_logs; " 2>/dev/null if [ $? -ne 0 ]; then echo "⚠️ WARNING: Impossibile verificare dati database (continuo comunque...)" fi echo "" echo "🚀 Avvio training..." echo "" echo "=======================================================================" # Cambia directory cd "$PYTHON_ML_DIR" # Esegui training "$VENV_PYTHON" train_hybrid.py --train --source database \ --db-host "$DB_HOST" \ --db-port "$DB_PORT" \ --db-name "$DB_NAME" \ --db-user "$DB_USER" \ --db-password "$DB_PASSWORD" \ --days "$DAYS" # Check exit code if [ $? -eq 0 ]; then echo "" echo "=======================================================================" echo "✅ TRAINING COMPLETATO CON SUCCESSO!" echo "=======================================================================" echo "" echo "📁 Modelli salvati in: $PYTHON_ML_DIR/models/" echo "" echo "🔄 PROSSIMI PASSI:" echo " 1. Restart ML backend: sudo systemctl restart ids-ml-backend" echo " 2. Verifica caricamento: sudo journalctl -u ids-ml-backend -f" echo " 3. Test API: curl http://localhost:8000/health" echo "" else echo "" echo "=======================================================================" echo "❌ ERRORE DURANTE IL TRAINING" echo "=======================================================================" echo "" echo "Controlla i log sopra per dettagli sull'errore." exit 1 fi