Add script to train hybrid ML detector with real data

Create a bash script to automate the training of the hybrid ML detector, automatically fetching database credentials from the .env file and executing the training process.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: a3c383a4-4a2c-4598-b060-f46984980561
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/2lUhxO2
This commit is contained in:
marco370 2025-11-24 18:05:04 +00:00
parent 34bd6eb8b8
commit 04136e4303

View File

@ -0,0 +1,125 @@
#!/bin/bash
#
# Training Hybrid ML Detector su Dati Reali
# Legge credenziali da /opt/ids/.env automaticamente
#
set -e # Exit on error
echo "======================================================================="
echo " TRAINING HYBRID ML DETECTOR - DATI REALI"
echo "======================================================================="
echo ""
# Percorsi
IDS_ROOT="/opt/ids"
ENV_FILE="$IDS_ROOT/.env"
PYTHON_ML_DIR="$IDS_ROOT/python_ml"
VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python"
# Verifica file .env esiste
if [ ! -f "$ENV_FILE" ]; then
echo "❌ ERRORE: File .env non trovato in $ENV_FILE"
exit 1
fi
# Carica variabili da .env
echo "📂 Caricamento credenziali database da .env..."
source "$ENV_FILE"
# Estrai credenziali database
DB_HOST="${PGHOST:-localhost}"
DB_PORT="${PGPORT:-5432}"
DB_NAME="${PGDATABASE:-ids}"
DB_USER="${PGUSER:-postgres}"
DB_PASSWORD="${PGPASSWORD}"
# Verifica password estratta
if [ -z "$DB_PASSWORD" ]; then
echo "❌ ERRORE: PGPASSWORD non trovata nel file .env"
echo " Aggiungi: PGPASSWORD=tua_password_qui"
exit 1
fi
echo "✅ Credenziali caricate:"
echo " Host: $DB_HOST"
echo " Port: $DB_PORT"
echo " Database: $DB_NAME"
echo " User: $DB_USER"
echo " Password: ****** (nascosta)"
echo ""
# Parametri training
DAYS="${1:-7}" # Default 7 giorni, puoi passare come argomento
MAX_SAMPLES="${2:-1000000}" # Default 1M records max
echo "🎯 Parametri training:"
echo " Periodo: ultimi $DAYS giorni"
echo " Max records: $MAX_SAMPLES"
echo ""
# Verifica venv Python
if [ ! -f "$VENV_PYTHON" ]; then
echo "❌ ERRORE: Virtual environment non trovato in $VENV_PYTHON"
echo " Esegui prima: cd $IDS_ROOT && python3 -m venv python_ml/venv"
exit 1
fi
echo "🐍 Python: $VENV_PYTHON"
echo ""
# Verifica dati disponibili nel database
echo "📊 Verifica dati disponibili nel database..."
PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c "
SELECT
TO_CHAR(MIN(timestamp), 'YYYY-MM-DD HH24:MI:SS') as primo_log,
TO_CHAR(MAX(timestamp), 'YYYY-MM-DD HH24:MI:SS') as ultimo_log,
EXTRACT(DAY FROM (MAX(timestamp) - MIN(timestamp))) || ' giorni' as periodo_totale,
TO_CHAR(COUNT(*), 'FM999,999,999') as totale_records
FROM network_logs;
" 2>/dev/null
if [ $? -ne 0 ]; then
echo "⚠️ WARNING: Impossibile verificare dati database (continuo comunque...)"
fi
echo ""
echo "🚀 Avvio training..."
echo ""
echo "======================================================================="
# Cambia directory
cd "$PYTHON_ML_DIR"
# Esegui training
"$VENV_PYTHON" train_hybrid.py --train --source database \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-name "$DB_NAME" \
--db-user "$DB_USER" \
--db-password "$DB_PASSWORD" \
--days "$DAYS"
# Check exit code
if [ $? -eq 0 ]; then
echo ""
echo "======================================================================="
echo "✅ TRAINING COMPLETATO CON SUCCESSO!"
echo "======================================================================="
echo ""
echo "📁 Modelli salvati in: $PYTHON_ML_DIR/models/"
echo ""
echo "🔄 PROSSIMI PASSI:"
echo " 1. Restart ML backend: sudo systemctl restart ids-ml-backend"
echo " 2. Verifica caricamento: sudo journalctl -u ids-ml-backend -f"
echo " 3. Test API: curl http://localhost:8000/health"
echo ""
else
echo ""
echo "======================================================================="
echo "❌ ERRORE DURANTE IL TRAINING"
echo "======================================================================="
echo ""
echo "Controlla i log sopra per dettagli sull'errore."
exit 1
fi