Add script to train hybrid ML detector with real data
Create a bash script to automate the training of the hybrid ML detector, automatically fetching database credentials from the .env file and executing the training process. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: a3c383a4-4a2c-4598-b060-f46984980561 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/2lUhxO2
This commit is contained in:
parent
34bd6eb8b8
commit
04136e4303
125
deployment/train_hybrid_production.sh
Normal file
125
deployment/train_hybrid_production.sh
Normal file
@ -0,0 +1,125 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Training Hybrid ML Detector su Dati Reali
|
||||
# Legge credenziali da /opt/ids/.env automaticamente
|
||||
#
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
echo "======================================================================="
|
||||
echo " TRAINING HYBRID ML DETECTOR - DATI REALI"
|
||||
echo "======================================================================="
|
||||
echo ""
|
||||
|
||||
# Percorsi
|
||||
IDS_ROOT="/opt/ids"
|
||||
ENV_FILE="$IDS_ROOT/.env"
|
||||
PYTHON_ML_DIR="$IDS_ROOT/python_ml"
|
||||
VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python"
|
||||
|
||||
# Verifica file .env esiste
|
||||
if [ ! -f "$ENV_FILE" ]; then
|
||||
echo "❌ ERRORE: File .env non trovato in $ENV_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Carica variabili da .env
|
||||
echo "📂 Caricamento credenziali database da .env..."
|
||||
source "$ENV_FILE"
|
||||
|
||||
# Estrai credenziali database
|
||||
DB_HOST="${PGHOST:-localhost}"
|
||||
DB_PORT="${PGPORT:-5432}"
|
||||
DB_NAME="${PGDATABASE:-ids}"
|
||||
DB_USER="${PGUSER:-postgres}"
|
||||
DB_PASSWORD="${PGPASSWORD}"
|
||||
|
||||
# Verifica password estratta
|
||||
if [ -z "$DB_PASSWORD" ]; then
|
||||
echo "❌ ERRORE: PGPASSWORD non trovata nel file .env"
|
||||
echo " Aggiungi: PGPASSWORD=tua_password_qui"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ Credenziali caricate:"
|
||||
echo " Host: $DB_HOST"
|
||||
echo " Port: $DB_PORT"
|
||||
echo " Database: $DB_NAME"
|
||||
echo " User: $DB_USER"
|
||||
echo " Password: ****** (nascosta)"
|
||||
echo ""
|
||||
|
||||
# Parametri training
|
||||
DAYS="${1:-7}" # Default 7 giorni, puoi passare come argomento
|
||||
MAX_SAMPLES="${2:-1000000}" # Default 1M records max
|
||||
|
||||
echo "🎯 Parametri training:"
|
||||
echo " Periodo: ultimi $DAYS giorni"
|
||||
echo " Max records: $MAX_SAMPLES"
|
||||
echo ""
|
||||
|
||||
# Verifica venv Python
|
||||
if [ ! -f "$VENV_PYTHON" ]; then
|
||||
echo "❌ ERRORE: Virtual environment non trovato in $VENV_PYTHON"
|
||||
echo " Esegui prima: cd $IDS_ROOT && python3 -m venv python_ml/venv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "🐍 Python: $VENV_PYTHON"
|
||||
echo ""
|
||||
|
||||
# Verifica dati disponibili nel database
|
||||
echo "📊 Verifica dati disponibili nel database..."
|
||||
PGPASSWORD="$DB_PASSWORD" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c "
|
||||
SELECT
|
||||
TO_CHAR(MIN(timestamp), 'YYYY-MM-DD HH24:MI:SS') as primo_log,
|
||||
TO_CHAR(MAX(timestamp), 'YYYY-MM-DD HH24:MI:SS') as ultimo_log,
|
||||
EXTRACT(DAY FROM (MAX(timestamp) - MIN(timestamp))) || ' giorni' as periodo_totale,
|
||||
TO_CHAR(COUNT(*), 'FM999,999,999') as totale_records
|
||||
FROM network_logs;
|
||||
" 2>/dev/null
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "⚠️ WARNING: Impossibile verificare dati database (continuo comunque...)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🚀 Avvio training..."
|
||||
echo ""
|
||||
echo "======================================================================="
|
||||
|
||||
# Cambia directory
|
||||
cd "$PYTHON_ML_DIR"
|
||||
|
||||
# Esegui training
|
||||
"$VENV_PYTHON" train_hybrid.py --train --source database \
|
||||
--db-host "$DB_HOST" \
|
||||
--db-port "$DB_PORT" \
|
||||
--db-name "$DB_NAME" \
|
||||
--db-user "$DB_USER" \
|
||||
--db-password "$DB_PASSWORD" \
|
||||
--days "$DAYS"
|
||||
|
||||
# Check exit code
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "======================================================================="
|
||||
echo "✅ TRAINING COMPLETATO CON SUCCESSO!"
|
||||
echo "======================================================================="
|
||||
echo ""
|
||||
echo "📁 Modelli salvati in: $PYTHON_ML_DIR/models/"
|
||||
echo ""
|
||||
echo "🔄 PROSSIMI PASSI:"
|
||||
echo " 1. Restart ML backend: sudo systemctl restart ids-ml-backend"
|
||||
echo " 2. Verifica caricamento: sudo journalctl -u ids-ml-backend -f"
|
||||
echo " 3. Test API: curl http://localhost:8000/health"
|
||||
echo ""
|
||||
else
|
||||
echo ""
|
||||
echo "======================================================================="
|
||||
echo "❌ ERRORE DURANTE IL TRAINING"
|
||||
echo "======================================================================="
|
||||
echo ""
|
||||
echo "Controlla i log sopra per dettagli sull'errore."
|
||||
exit 1
|
||||
fi
|
||||
Loading…
Reference in New Issue
Block a user