Add historical training data logging for hybrid models
Integrate saving of training history to the database within `train_hybrid.py`, ensuring that model versioning is correctly applied for hybrid detector runs. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: 9f8d0aa1-70ec-4271-b143-5f66d1d3756b Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/RJGlbTt
This commit is contained in:
parent
2a33ac82fa
commit
a47079c97c
0
deployment/run_ml_training.sh
Normal file → Executable file
0
deployment/run_ml_training.sh
Normal file → Executable file
0
deployment/setup_ml_training_timer.sh
Normal file → Executable file
0
deployment/setup_ml_training_timer.sh
Normal file → Executable file
@ -60,6 +60,44 @@ def train_on_real_traffic(db_config: dict, days: int = 7) -> pd.DataFrame:
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def save_training_history(db_config: dict, result: dict):
|
||||||
|
"""
|
||||||
|
Save training results to database training_history table
|
||||||
|
"""
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
MODEL_VERSION = "2.0.0" # Hybrid ML Detector version
|
||||||
|
|
||||||
|
print(f"\n[TRAIN] Saving training history to database...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = psycopg2.connect(**db_config)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
INSERT INTO training_history
|
||||||
|
(model_version, records_processed, features_count, training_duration, status, notes)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s)
|
||||||
|
""", (
|
||||||
|
MODEL_VERSION,
|
||||||
|
result['records_processed'],
|
||||||
|
result['features_selected'], # Use selected features count
|
||||||
|
0, # duration not implemented yet
|
||||||
|
'success',
|
||||||
|
f"Anomalie: {result['anomalies_detected']}/{result['unique_ips']} - {result['model_type']}"
|
||||||
|
))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"[TRAIN] ✅ Training history saved (version {MODEL_VERSION})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[TRAIN] ⚠ Failed to save training history: {e}")
|
||||||
|
# Don't fail the whole training if just logging fails
|
||||||
|
|
||||||
|
|
||||||
def train_unsupervised(args):
|
def train_unsupervised(args):
|
||||||
"""
|
"""
|
||||||
Train unsupervised model (no labels needed)
|
Train unsupervised model (no labels needed)
|
||||||
@ -71,6 +109,9 @@ def train_unsupervised(args):
|
|||||||
|
|
||||||
detector = MLHybridDetector(model_dir=args.model_dir)
|
detector = MLHybridDetector(model_dir=args.model_dir)
|
||||||
|
|
||||||
|
# Database config for later use
|
||||||
|
db_config = None
|
||||||
|
|
||||||
# Load data
|
# Load data
|
||||||
if args.source == 'synthetic':
|
if args.source == 'synthetic':
|
||||||
print("\n[TRAIN] Using synthetic dataset...")
|
print("\n[TRAIN] Using synthetic dataset...")
|
||||||
@ -109,6 +150,10 @@ def train_unsupervised(args):
|
|||||||
print(f" Model type: {result['model_type']}")
|
print(f" Model type: {result['model_type']}")
|
||||||
print("="*70)
|
print("="*70)
|
||||||
|
|
||||||
|
# Save training history to database (if using database source)
|
||||||
|
if db_config and args.source == 'database':
|
||||||
|
save_training_history(db_config, result)
|
||||||
|
|
||||||
print(f"\n✅ Training completed! Models saved to: {args.model_dir}")
|
print(f"\n✅ Training completed! Models saved to: {args.model_dir}")
|
||||||
print(f"\nNext steps:")
|
print(f"\nNext steps:")
|
||||||
print(f" 1. Test detection: python python_ml/test_detection.py")
|
print(f" 1. Test detection: python python_ml/test_detection.py")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user