Update model comparison script to use current database detections

Adjusted script to query existing database detections instead of a specific model version, updating column names to match the actual database schema (source_ip, risk_score, anomaly_type, log_count, last_seen, detected_at).

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 62d703c2-4658-4280-aec5-f5e7c090b266
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/RJGlbTt
This commit is contained in:
marco370 2025-11-25 08:42:06 +00:00
parent c56af1cb16
commit cf3223b247
3 changed files with 41 additions and 353 deletions

View File

@ -18,6 +18,10 @@ externalPort = 80
localPort = 41303
externalPort = 3002
[[ports]]
localPort = 41797
externalPort = 3001
[[ports]]
localPort = 43471
externalPort = 3003

View File

@ -1,316 +0,0 @@
--
-- PostgreSQL database dump
--
\restrict 2TIWDQhMSrkSVUlE3vwc8arABguQGV9g2yPQXKdAAlO8nCVYt9WgzvyDfS5usR6
-- Dumped from database version 16.9 (415ebe8)
-- Dumped by pg_dump version 16.10
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
SET default_tablespace = '';
SET default_table_access_method = heap;
--
-- Name: detections; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.detections (
id character varying DEFAULT gen_random_uuid() NOT NULL,
source_ip text NOT NULL,
risk_score numeric(5,2) NOT NULL,
confidence numeric(5,2) NOT NULL,
anomaly_type text NOT NULL,
reason text,
log_count integer NOT NULL,
first_seen timestamp without time zone NOT NULL,
last_seen timestamp without time zone NOT NULL,
blocked boolean DEFAULT false NOT NULL,
blocked_at timestamp without time zone,
detected_at timestamp without time zone DEFAULT now() NOT NULL,
country text,
country_code text,
city text,
organization text,
as_number text,
as_name text,
isp text
);
--
-- Name: network_analytics; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.network_analytics (
id character varying DEFAULT gen_random_uuid() NOT NULL,
date timestamp without time zone NOT NULL,
hour integer,
total_packets integer DEFAULT 0 NOT NULL,
total_bytes bigint DEFAULT 0 NOT NULL,
unique_ips integer DEFAULT 0 NOT NULL,
normal_packets integer DEFAULT 0 NOT NULL,
normal_bytes bigint DEFAULT 0 NOT NULL,
normal_unique_ips integer DEFAULT 0 NOT NULL,
top_normal_ips text,
attack_packets integer DEFAULT 0 NOT NULL,
attack_bytes bigint DEFAULT 0 NOT NULL,
attack_unique_ips integer DEFAULT 0 NOT NULL,
attacks_by_country text,
attacks_by_type text,
top_attackers text,
traffic_by_country text,
created_at timestamp without time zone DEFAULT now() NOT NULL
);
--
-- Name: network_logs; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.network_logs (
id character varying DEFAULT gen_random_uuid() NOT NULL,
router_id character varying NOT NULL,
"timestamp" timestamp without time zone NOT NULL,
source_ip text NOT NULL,
destination_ip text,
source_port integer,
destination_port integer,
protocol text,
action text,
bytes integer,
packets integer,
logged_at timestamp without time zone DEFAULT now() NOT NULL,
router_name text DEFAULT 'unknown'::text NOT NULL
);
--
-- Name: routers; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.routers (
id character varying DEFAULT gen_random_uuid() NOT NULL,
name text NOT NULL,
ip_address text NOT NULL,
api_port integer DEFAULT 8728 NOT NULL,
username text NOT NULL,
password text NOT NULL,
enabled boolean DEFAULT true NOT NULL,
last_sync timestamp without time zone,
created_at timestamp without time zone DEFAULT now() NOT NULL
);
--
-- Name: schema_version; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.schema_version (
id integer DEFAULT 1 NOT NULL,
version integer DEFAULT 0 NOT NULL,
applied_at timestamp without time zone DEFAULT now() NOT NULL,
description text
);
--
-- Name: training_history; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.training_history (
id character varying DEFAULT gen_random_uuid() NOT NULL,
model_version text NOT NULL,
records_processed integer NOT NULL,
features_count integer NOT NULL,
accuracy numeric(5,2),
training_duration integer,
status text NOT NULL,
notes text,
trained_at timestamp without time zone DEFAULT now() NOT NULL
);
--
-- Name: whitelist; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.whitelist (
id character varying DEFAULT gen_random_uuid() NOT NULL,
ip_address text NOT NULL,
comment text,
reason text,
created_by text,
active boolean DEFAULT true NOT NULL,
created_at timestamp without time zone DEFAULT now() NOT NULL
);
--
-- Name: detections detections_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.detections
ADD CONSTRAINT detections_pkey PRIMARY KEY (id);
--
-- Name: network_analytics network_analytics_date_hour_key; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.network_analytics
ADD CONSTRAINT network_analytics_date_hour_key UNIQUE (date, hour);
--
-- Name: network_analytics network_analytics_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.network_analytics
ADD CONSTRAINT network_analytics_pkey PRIMARY KEY (id);
--
-- Name: network_logs network_logs_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.network_logs
ADD CONSTRAINT network_logs_pkey PRIMARY KEY (id);
--
-- Name: routers routers_ip_address_unique; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.routers
ADD CONSTRAINT routers_ip_address_unique UNIQUE (ip_address);
--
-- Name: routers routers_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.routers
ADD CONSTRAINT routers_pkey PRIMARY KEY (id);
--
-- Name: schema_version schema_version_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.schema_version
ADD CONSTRAINT schema_version_pkey PRIMARY KEY (id);
--
-- Name: training_history training_history_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.training_history
ADD CONSTRAINT training_history_pkey PRIMARY KEY (id);
--
-- Name: whitelist whitelist_ip_address_unique; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.whitelist
ADD CONSTRAINT whitelist_ip_address_unique UNIQUE (ip_address);
--
-- Name: whitelist whitelist_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.whitelist
ADD CONSTRAINT whitelist_pkey PRIMARY KEY (id);
--
-- Name: country_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX country_idx ON public.detections USING btree (country);
--
-- Name: detected_at_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX detected_at_idx ON public.detections USING btree (detected_at);
--
-- Name: detection_source_ip_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX detection_source_ip_idx ON public.detections USING btree (source_ip);
--
-- Name: network_analytics_date_hour_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX network_analytics_date_hour_idx ON public.network_analytics USING btree (date, hour);
--
-- Name: network_analytics_date_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX network_analytics_date_idx ON public.network_analytics USING btree (date);
--
-- Name: risk_score_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX risk_score_idx ON public.detections USING btree (risk_score);
--
-- Name: router_id_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX router_id_idx ON public.network_logs USING btree (router_id);
--
-- Name: source_ip_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX source_ip_idx ON public.network_logs USING btree (source_ip);
--
-- Name: timestamp_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX timestamp_idx ON public.network_logs USING btree ("timestamp");
--
-- Name: network_logs network_logs_router_id_routers_id_fk; Type: FK CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.network_logs
ADD CONSTRAINT network_logs_router_id_routers_id_fk FOREIGN KEY (router_id) REFERENCES public.routers(id);
--
-- PostgreSQL database dump complete
--
\unrestrict 2TIWDQhMSrkSVUlE3vwc8arABguQGV9g2yPQXKdAAlO8nCVYt9WgzvyDfS5usR6

View File

@ -29,9 +29,10 @@ def get_db_connection():
def load_old_detections(limit=100):
"""
Carica le detection del vecchio modello (model_version 1.0.0)
Carica le detection esistenti dal database
(non filtriamo per model_version perché la colonna non esiste)
"""
print("\n[1] Caricamento detection vecchio modello (v1.0.0)...")
print("\n[1] Caricamento detection esistenti dal database...")
conn = get_db_connection()
cursor = conn.cursor(cursor_factory=RealDictCursor)
@ -39,16 +40,15 @@ def load_old_detections(limit=100):
query = """
SELECT
d.id,
d.ip_address,
d.anomaly_score,
d.risk_level,
d.detection_count,
d.last_detected,
d.source_ip,
d.risk_score,
d.anomaly_type,
d.log_count,
d.last_seen,
d.blocked,
d.model_version
d.detected_at
FROM detections d
WHERE d.model_version = '1.0.0'
ORDER BY d.anomaly_score DESC
ORDER BY d.risk_score DESC
LIMIT %s
"""
@ -57,7 +57,7 @@ def load_old_detections(limit=100):
cursor.close()
conn.close()
print(f" Trovate {len(detections)} detection del modello v1.0.0")
print(f" Trovate {len(detections)} detection nel database")
return detections
@ -127,20 +127,20 @@ def reanalyze_with_hybrid(detector, ip_address, old_detection):
'ip_address': ip_address,
'logs_count': len(logs),
# Vecchio modello (v1.0.0)
'old_score': old_detection['anomaly_score'],
'old_risk_level': old_detection['risk_level'],
# Detection corrente nel DB
'old_score': float(old_detection['risk_score']),
'old_anomaly_type': old_detection['anomaly_type'],
'old_blocked': old_detection['blocked'],
# Nuovo modello (v2.0.0)
# Nuovo modello Hybrid (rianalisi)
'new_score': new_detection.get('anomaly_score', 0),
'new_risk_level': new_detection.get('risk_level', 'unknown'),
'new_anomaly_type': new_detection.get('anomaly_type', 'unknown'),
'new_confidence': new_detection.get('confidence', 'unknown'),
'new_is_anomaly': new_detection.get('is_anomaly', False),
# Delta
'score_delta': new_detection.get('anomaly_score', 0) - old_detection['anomaly_score'],
'risk_changed': old_detection['risk_level'] != new_detection.get('risk_level', 'unknown'),
'score_delta': new_detection.get('anomaly_score', 0) - float(old_detection['risk_score']),
'type_changed': old_detection['anomaly_type'] != new_detection.get('anomaly_type', 'unknown'),
}
return comparison
@ -148,14 +148,14 @@ def reanalyze_with_hybrid(detector, ip_address, old_detection):
def main():
print("\n" + "="*80)
print(" IDS MODEL COMPARISON - v1.0.0 vs v2.0.0")
print(" IDS MODEL COMPARISON - DB Current vs Hybrid Detector v2.0.0")
print("="*80)
# Carica vecchie detection
# Carica detection esistenti
old_detections = load_old_detections(limit=50)
if not old_detections:
print("\n❌ Nessuna detection del vecchio modello trovata!")
print("\n❌ Nessuna detection trovata nel database!")
return
# Carica nuovo modello Hybrid
@ -170,23 +170,23 @@ def main():
print(f" ✅ Hybrid Detector caricato (18 feature selezionate)")
# Rianalizza ogni IP con nuovo modello
print(f"\n[3] Rianalisi di {len(old_detections)} IP con nuovo modello...")
print(f"\n[3] Rianalisi di {len(old_detections)} IP con nuovo modello Hybrid...")
print(" (Questo può richiedere alcuni minuti...)")
comparisons = []
for i, old_det in enumerate(old_detections):
ip = old_det['ip_address']
ip = old_det['source_ip']
print(f"\n [{i+1}/{len(old_detections)}] Analisi IP: {ip}")
print(f" Old: score={old_det['anomaly_score']:.1f}, risk={old_det['risk_level']}, blocked={old_det['blocked']}")
print(f" Current: score={float(old_det['risk_score']):.1f}, type={old_det['anomaly_type']}, blocked={old_det['blocked']}")
comparison = reanalyze_with_hybrid(detector, ip, old_det)
if comparison:
comparisons.append(comparison)
print(f" New: score={comparison['new_score']:.1f}, risk={comparison['new_risk_level']}, confidence={comparison['new_confidence']}")
print(f" Δ: {comparison['score_delta']:+.1f} score")
print(f" Hybrid: score={comparison['new_score']:.1f}, type={comparison['new_anomaly_type']}, confidence={comparison['new_confidence']}")
print(f" Δ: {comparison['score_delta']:+.1f} score")
else:
print(f" ⚠ Nessun log recente trovato per questo IP")
@ -204,11 +204,11 @@ def main():
# Statistiche
print(f"\nIP rianalizzati: {len(comparisons)}/{len(old_detections)}")
print(f"\nScore medio:")
print(f" Vecchio modello: {df_comp['old_score'].mean():.1f}")
print(f" Nuovo modello: {df_comp['new_score'].mean():.1f}")
print(f" Delta medio: {df_comp['score_delta'].mean():+.1f}")
print(f" Detection correnti: {df_comp['old_score'].mean():.1f}")
print(f" Hybrid Detector: {df_comp['new_score'].mean():.1f}")
print(f" Delta medio: {df_comp['score_delta'].mean():+.1f}")
# False Positives (vecchio modello flaggava, nuovo no)
# False Positives (DB aveva score alto, Hybrid dice normale)
false_positives = df_comp[
(df_comp['old_score'] >= 80) &
(~df_comp['new_is_anomaly'])
@ -216,9 +216,9 @@ def main():
print(f"\n🎯 Possibili False Positives ridotti: {len(false_positives)}")
if len(false_positives) > 0:
print("\n IP che vecchio modello bloccava ma nuovo modello ritiene normali:")
print("\n IP con score alto nel DB ma ritenuti normali dal Hybrid Detector:")
for _, row in false_positives.iterrows():
print(f"{row['ip_address']} (old={row['old_score']:.0f}, new={row['new_score']:.0f})")
print(f"{row['ip_address']} (DB={row['old_score']:.0f}, Hybrid={row['new_score']:.0f})")
# True Positives confermati
true_positives = df_comp[
@ -226,18 +226,18 @@ def main():
(df_comp['new_is_anomaly'])
]
print(f"\n✅ Anomalie confermate da entrambi i modelli: {len(true_positives)}")
print(f"\n✅ Anomalie confermate da Hybrid Detector: {len(true_positives)}")
# Confidence breakdown (solo nuovo modello)
if 'new_confidence' in df_comp.columns:
print(f"\n📊 Confidence Level distribuzione (nuovo modello):")
print(f"\n📊 Confidence Level distribuzione (Hybrid Detector):")
conf_counts = df_comp['new_confidence'].value_counts()
for conf, count in conf_counts.items():
print(f"{conf}: {count} IP")
# Risk level changes
risk_changes = df_comp[df_comp['risk_changed']]
print(f"\n🔄 IP con cambio livello di rischio: {len(risk_changes)}")
# Type changes
type_changes = df_comp[df_comp['type_changed']]
print(f"\n🔄 IP con cambio tipo anomalia: {len(type_changes)}")
# Top 10 maggiori riduzioni score
print(f"\n📉 Top 10 riduzioni score (possibili FP corretti):")