ids.alfacom.it/deployment/run_ml_training.sh
marco370 b88377e2d5 Adapt ML model to new database schema and automate training
Adjusts SQL queries and feature extraction to accommodate changes in the network_logs database schema, enabling automatic weekly retraining of the ML hybrid detector.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: intermediate_checkpoint
Replit-Commit-Event-Id: f4fdd53b-f433-44d9-9f0f-63616a9eeec1
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/2lUhxO2
2025-11-24 18:14:43 +00:00

93 lines
2.3 KiB
Bash

#!/bin/bash
#
# ML Training Wrapper - Esecuzione Automatica via Systemd
# Carica credenziali da .env in modo sicuro
#
set -e
IDS_ROOT="/opt/ids"
ENV_FILE="$IDS_ROOT/.env"
PYTHON_ML_DIR="$IDS_ROOT/python_ml"
VENV_PYTHON="$PYTHON_ML_DIR/venv/bin/python"
LOG_DIR="/var/log/ids"
# Crea directory log se non esiste
mkdir -p "$LOG_DIR"
# File log dedicato
LOG_FILE="$LOG_DIR/ml-training.log"
# Funzione logging
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
log "========================================="
log "ML Training - Avvio automatico"
log "========================================="
# Verifica .env
if [ ! -f "$ENV_FILE" ]; then
log "ERROR: File .env non trovato: $ENV_FILE"
exit 1
fi
# Carica variabili ambiente
log "Caricamento credenziali database..."
set -a
source "$ENV_FILE"
set +a
# Verifica credenziali
if [ -z "$PGPASSWORD" ]; then
log "ERROR: PGPASSWORD non trovata in .env"
exit 1
fi
DB_HOST="${PGHOST:-localhost}"
DB_PORT="${PGPORT:-5432}"
DB_NAME="${PGDATABASE:-ids}"
DB_USER="${PGUSER:-postgres}"
log "Database: $DB_USER@$DB_HOST:$DB_PORT/$DB_NAME"
# Verifica venv
if [ ! -f "$VENV_PYTHON" ]; then
log "ERROR: Venv Python non trovato: $VENV_PYTHON"
exit 1
fi
# Parametri training
DAYS="${ML_TRAINING_DAYS:-7}" # Default 7 giorni, configurabile via env var
log "Training ultimi $DAYS giorni di traffico..."
# Esegui training
cd "$PYTHON_ML_DIR"
"$VENV_PYTHON" train_hybrid.py --train --source database \
--db-host "$DB_HOST" \
--db-port "$DB_PORT" \
--db-name "$DB_NAME" \
--db-user "$DB_USER" \
--db-password "$PGPASSWORD" \
--days "$DAYS" 2>&1 | tee -a "$LOG_FILE"
# Check exit code
if [ ${PIPESTATUS[0]} -eq 0 ]; then
log "========================================="
log "✅ Training completato con successo!"
log "========================================="
log "Modelli salvati in: $PYTHON_ML_DIR/models/"
log ""
log "Il ML backend caricherà automaticamente i nuovi modelli al prossimo riavvio."
log "Per applicare immediatamente: sudo systemctl restart ids-ml-backend"
exit 0
else
log "========================================="
log "❌ ERRORE durante il training"
log "========================================="
log "Controlla log completo: $LOG_FILE"
exit 1
fi