Adjusts SQL queries and feature extraction to accommodate changes in the network_logs database schema, enabling automatic weekly retraining of the ML hybrid detector. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: intermediate_checkpoint Replit-Commit-Event-Id: f4fdd53b-f433-44d9-9f0f-63616a9eeec1 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/2lUhxO2
93 lines
2.3 KiB
Bash
93 lines
2.3 KiB
Bash
#!/bin/bash
|
|
#
|
|
# ML Training Wrapper - Esecuzione Automatica via Systemd
|
|
# Carica credenziali da .env in modo sicuro
|
|
#
|
|
|
|
set -e
|
|
|
|
IDS_ROOT="/opt/ids"
|
|
ENV_FILE="$IDS_ROOT/.env"
|
|
PYTHON_ML_DIR="$IDS_ROOT/python_ml"
|
|
VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python"
|
|
LOG_DIR="/var/log/ids"
|
|
|
|
# Crea directory log se non esiste
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
# File log dedicato
|
|
LOG_FILE="$LOG_DIR/ml-training.log"
|
|
|
|
# Funzione logging
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
log "========================================="
|
|
log "ML Training - Avvio automatico"
|
|
log "========================================="
|
|
|
|
# Verifica .env
|
|
if [ ! -f "$ENV_FILE" ]; then
|
|
log "ERROR: File .env non trovato: $ENV_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
# Carica variabili ambiente
|
|
log "Caricamento credenziali database..."
|
|
set -a
|
|
source "$ENV_FILE"
|
|
set +a
|
|
|
|
# Verifica credenziali
|
|
if [ -z "$PGPASSWORD" ]; then
|
|
log "ERROR: PGPASSWORD non trovata in .env"
|
|
exit 1
|
|
fi
|
|
|
|
DB_HOST="${PGHOST:-localhost}"
|
|
DB_PORT="${PGPORT:-5432}"
|
|
DB_NAME="${PGDATABASE:-ids}"
|
|
DB_USER="${PGUSER:-postgres}"
|
|
|
|
log "Database: $DB_USER@$DB_HOST:$DB_PORT/$DB_NAME"
|
|
|
|
# Verifica venv
|
|
if [ ! -f "$VENV_PYTHON" ]; then
|
|
log "ERROR: Venv Python non trovato: $VENV_PYTHON"
|
|
exit 1
|
|
fi
|
|
|
|
# Parametri training
|
|
DAYS="${ML_TRAINING_DAYS:-7}" # Default 7 giorni, configurabile via env var
|
|
|
|
log "Training ultimi $DAYS giorni di traffico..."
|
|
|
|
# Esegui training
|
|
cd "$PYTHON_ML_DIR"
|
|
"$VENV_PYTHON" train_hybrid.py --train --source database \
|
|
--db-host "$DB_HOST" \
|
|
--db-port "$DB_PORT" \
|
|
--db-name "$DB_NAME" \
|
|
--db-user "$DB_USER" \
|
|
--db-password "$PGPASSWORD" \
|
|
--days "$DAYS" 2>&1 | tee -a "$LOG_FILE"
|
|
|
|
# Check exit code
|
|
if [ ${PIPESTATUS[0]} -eq 0 ]; then
|
|
log "========================================="
|
|
log "✅ Training completato con successo!"
|
|
log "========================================="
|
|
log "Modelli salvati in: $PYTHON_ML_DIR/models/"
|
|
log ""
|
|
log "Il ML backend caricherà automaticamente i nuovi modelli al prossimo riavvio."
|
|
log "Per applicare immediatamente: sudo systemctl restart ids-ml-backend"
|
|
exit 0
|
|
else
|
|
log "========================================="
|
|
log "❌ ERRORE durante il training"
|
|
log "========================================="
|
|
log "Controlla log completo: $LOG_FILE"
|
|
exit 1
|
|
fi
|