diff --git a/deployment/train_hybrid_production.sh b/deployment/train_hybrid_production.sh new file mode 100644 index 0000000..5bef5d9 --- /dev/null +++ b/deployment/train_hybrid_production.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# +# Training Hybrid ML Detector su Dati Reali +# Legge credenziali da /opt/ids/.env automaticamente +# + +set -e # Exit on error + +echo "=======================================================================" +echo " TRAINING HYBRID ML DETECTOR - DATI REALI" +echo "=======================================================================" +echo "" + +# Percorsi +IDS_ROOT="/opt/ids" +ENV_FILE="$IDS_ROOT/.env" +PYTHON_ML_DIR="$IDS_ROOT/python_ml" +VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python" + +# Verifica file .env esiste +if [ ! -f "$ENV_FILE" ]; then + echo "❌ ERRORE: File .env non trovato in $ENV_FILE" + exit 1 +fi + +# Carica variabili da .env +echo "📂 Caricamento credenziali database da .env..." +source "$ENV_FILE" + +# Estrai credenziali database +DB_HOST="${PGHOST:-localhost}" +DB_PORT="${PGPORT:-5432}" +DB_NAME="${PGDATABASE:-ids}" +DB_USER="${PGUSER:-postgres}" +DB_PASSWORD="${PGPASSWORD}" + +# Verifica password estratta +if [ -z "$DB_PASSWORD" ]; then + echo "❌ ERRORE: PGPASSWORD non trovata nel file .env" + echo " Aggiungi: PGPASSWORD=tua_password_qui" + exit 1 +fi + +echo "✅ Credenziali caricate:" +echo " Host: $DB_HOST" +echo " Port: $DB_PORT" +echo " Database: $DB_NAME" +echo " User: $DB_USER" +echo " Password: ****** (nascosta)" +echo "" + +# Parametri training +DAYS="${1:-7}" # Default 7 giorni, puoi passare come argomento +MAX_SAMPLES="${2:-1000000}" # Default 1M records max + +echo "🎯 Parametri training:" +echo " Periodo: ultimi $DAYS giorni" +echo " Max records: $MAX_SAMPLES" +echo "" + +# Verifica venv Python +if [ ! -f "$VENV_PYTHON" ]; then + echo "❌ ERRORE: Virtual environment non trovato in $VENV_PYTHON" + echo " Esegui prima: cd $IDS_ROOT && python3 -m venv python_ml/venv" + exit 1 +fi + +echo "🐍 Python: $VENV_PYTHON" +echo "" + +# Verifica dati disponibili nel database +echo "📊 Verifica dati disponibili nel database..." +PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c " +SELECT + TO_CHAR(MIN(timestamp), 'YYYY-MM-DD HH24:MI:SS') as primo_log, + TO_CHAR(MAX(timestamp), 'YYYY-MM-DD HH24:MI:SS') as ultimo_log, + EXTRACT(DAY FROM (MAX(timestamp) - MIN(timestamp))) || ' giorni' as periodo_totale, + TO_CHAR(COUNT(*), 'FM999,999,999') as totale_records +FROM network_logs; +" 2>/dev/null + +if [ $? -ne 0 ]; then + echo "⚠️ WARNING: Impossibile verificare dati database (continuo comunque...)" +fi + +echo "" +echo "🚀 Avvio training..." +echo "" +echo "=======================================================================" + +# Cambia directory +cd "$PYTHON_ML_DIR" + +# Esegui training +"$VENV_PYTHON" train_hybrid.py --train --source database \ + --db-host "$DB_HOST" \ + --db-port "$DB_PORT" \ + --db-name "$DB_NAME" \ + --db-user "$DB_USER" \ + --db-password "$DB_PASSWORD" \ + --days "$DAYS" + +# Check exit code +if [ $? -eq 0 ]; then + echo "" + echo "=======================================================================" + echo "✅ TRAINING COMPLETATO CON SUCCESSO!" + echo "=======================================================================" + echo "" + echo "📁 Modelli salvati in: $PYTHON_ML_DIR/models/" + echo "" + echo "🔄 PROSSIMI PASSI:" + echo " 1. Restart ML backend: sudo systemctl restart ids-ml-backend" + echo " 2. Verifica caricamento: sudo journalctl -u ids-ml-backend -f" + echo " 3. Test API: curl http://localhost:8000/health" + echo "" +else + echo "" + echo "=======================================================================" + echo "❌ ERRORE DURANTE IL TRAINING" + echo "=======================================================================" + echo "" + echo "Controlla i log sopra per dettagli sull'errore." + exit 1 +fi