From cf3223b2473db50dc7b65589ce173d23a9873857 Mon Sep 17 00:00:00 2001 From: marco370 <48531002-marco370@users.noreply.replit.com> Date: Tue, 25 Nov 2025 08:42:06 +0000 Subject: [PATCH] Update model comparison script to use current database detections Adjusted script to query existing database detections instead of a specific model version, updating column names to match the actual database schema (source_ip, risk_score, anomaly_type, log_count, last_seen, detected_at). Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: 62d703c2-4658-4280-aec5-f5e7c090b266 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/RJGlbTt --- .replit | 4 + database-schema/schema.sql | 316 ------------------------------------ python_ml/compare_models.py | 74 ++++----- 3 files changed, 41 insertions(+), 353 deletions(-) diff --git a/.replit b/.replit index 3dc4618..8eace38 100644 --- a/.replit +++ b/.replit @@ -18,6 +18,10 @@ externalPort = 80 localPort = 41303 externalPort = 3002 +[[ports]] +localPort = 41797 +externalPort = 3001 + [[ports]] localPort = 43471 externalPort = 3003 diff --git a/database-schema/schema.sql b/database-schema/schema.sql index 03c6084..e69de29 100644 --- a/database-schema/schema.sql +++ b/database-schema/schema.sql @@ -1,316 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict 2TIWDQhMSrkSVUlE3vwc8arABguQGV9g2yPQXKdAAlO8nCVYt9WgzvyDfS5usR6 - --- Dumped from database version 16.9 (415ebe8) --- Dumped by pg_dump version 16.10 - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: detections; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.detections ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - source_ip text NOT NULL, - risk_score numeric(5,2) NOT NULL, - confidence numeric(5,2) NOT NULL, - anomaly_type text NOT NULL, - reason text, - log_count integer NOT NULL, - first_seen timestamp without time zone NOT NULL, - last_seen timestamp without time zone NOT NULL, - blocked boolean DEFAULT false NOT NULL, - blocked_at timestamp without time zone, - detected_at timestamp without time zone DEFAULT now() NOT NULL, - country text, - country_code text, - city text, - organization text, - as_number text, - as_name text, - isp text -); - - --- --- Name: network_analytics; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.network_analytics ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - date timestamp without time zone NOT NULL, - hour integer, - total_packets integer DEFAULT 0 NOT NULL, - total_bytes bigint DEFAULT 0 NOT NULL, - unique_ips integer DEFAULT 0 NOT NULL, - normal_packets integer DEFAULT 0 NOT NULL, - normal_bytes bigint DEFAULT 0 NOT NULL, - normal_unique_ips integer DEFAULT 0 NOT NULL, - top_normal_ips text, - attack_packets integer DEFAULT 0 NOT NULL, - attack_bytes bigint DEFAULT 0 NOT NULL, - attack_unique_ips integer DEFAULT 0 NOT NULL, - attacks_by_country text, - attacks_by_type text, - top_attackers text, - traffic_by_country text, - created_at timestamp without time zone DEFAULT now() NOT NULL -); - - --- --- Name: network_logs; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.network_logs ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - router_id character varying NOT NULL, - "timestamp" timestamp without time zone NOT NULL, - source_ip text NOT NULL, - destination_ip text, - source_port integer, - destination_port integer, - protocol text, - action text, - bytes integer, - packets integer, - logged_at timestamp without time zone DEFAULT now() NOT NULL, - router_name text DEFAULT 'unknown'::text NOT NULL -); - - --- --- Name: routers; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.routers ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - name text NOT NULL, - ip_address text NOT NULL, - api_port integer DEFAULT 8728 NOT NULL, - username text NOT NULL, - password text NOT NULL, - enabled boolean DEFAULT true NOT NULL, - last_sync timestamp without time zone, - created_at timestamp without time zone DEFAULT now() NOT NULL -); - - --- --- Name: schema_version; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.schema_version ( - id integer DEFAULT 1 NOT NULL, - version integer DEFAULT 0 NOT NULL, - applied_at timestamp without time zone DEFAULT now() NOT NULL, - description text -); - - --- --- Name: training_history; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.training_history ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - model_version text NOT NULL, - records_processed integer NOT NULL, - features_count integer NOT NULL, - accuracy numeric(5,2), - training_duration integer, - status text NOT NULL, - notes text, - trained_at timestamp without time zone DEFAULT now() NOT NULL -); - - --- --- Name: whitelist; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.whitelist ( - id character varying DEFAULT gen_random_uuid() NOT NULL, - ip_address text NOT NULL, - comment text, - reason text, - created_by text, - active boolean DEFAULT true NOT NULL, - created_at timestamp without time zone DEFAULT now() NOT NULL -); - - --- --- Name: detections detections_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.detections - ADD CONSTRAINT detections_pkey PRIMARY KEY (id); - - --- --- Name: network_analytics network_analytics_date_hour_key; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.network_analytics - ADD CONSTRAINT network_analytics_date_hour_key UNIQUE (date, hour); - - --- --- Name: network_analytics network_analytics_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.network_analytics - ADD CONSTRAINT network_analytics_pkey PRIMARY KEY (id); - - --- --- Name: network_logs network_logs_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.network_logs - ADD CONSTRAINT network_logs_pkey PRIMARY KEY (id); - - --- --- Name: routers routers_ip_address_unique; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.routers - ADD CONSTRAINT routers_ip_address_unique UNIQUE (ip_address); - - --- --- Name: routers routers_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.routers - ADD CONSTRAINT routers_pkey PRIMARY KEY (id); - - --- --- Name: schema_version schema_version_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.schema_version - ADD CONSTRAINT schema_version_pkey PRIMARY KEY (id); - - --- --- Name: training_history training_history_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.training_history - ADD CONSTRAINT training_history_pkey PRIMARY KEY (id); - - --- --- Name: whitelist whitelist_ip_address_unique; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.whitelist - ADD CONSTRAINT whitelist_ip_address_unique UNIQUE (ip_address); - - --- --- Name: whitelist whitelist_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.whitelist - ADD CONSTRAINT whitelist_pkey PRIMARY KEY (id); - - --- --- Name: country_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX country_idx ON public.detections USING btree (country); - - --- --- Name: detected_at_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX detected_at_idx ON public.detections USING btree (detected_at); - - --- --- Name: detection_source_ip_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX detection_source_ip_idx ON public.detections USING btree (source_ip); - - --- --- Name: network_analytics_date_hour_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX network_analytics_date_hour_idx ON public.network_analytics USING btree (date, hour); - - --- --- Name: network_analytics_date_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX network_analytics_date_idx ON public.network_analytics USING btree (date); - - --- --- Name: risk_score_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX risk_score_idx ON public.detections USING btree (risk_score); - - --- --- Name: router_id_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX router_id_idx ON public.network_logs USING btree (router_id); - - --- --- Name: source_ip_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX source_ip_idx ON public.network_logs USING btree (source_ip); - - --- --- Name: timestamp_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX timestamp_idx ON public.network_logs USING btree ("timestamp"); - - --- --- Name: network_logs network_logs_router_id_routers_id_fk; Type: FK CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.network_logs - ADD CONSTRAINT network_logs_router_id_routers_id_fk FOREIGN KEY (router_id) REFERENCES public.routers(id); - - --- --- PostgreSQL database dump complete --- - -\unrestrict 2TIWDQhMSrkSVUlE3vwc8arABguQGV9g2yPQXKdAAlO8nCVYt9WgzvyDfS5usR6 - diff --git a/python_ml/compare_models.py b/python_ml/compare_models.py index 2f3c7eb..de38cb8 100644 --- a/python_ml/compare_models.py +++ b/python_ml/compare_models.py @@ -29,9 +29,10 @@ def get_db_connection(): def load_old_detections(limit=100): """ - Carica le detection del vecchio modello (model_version 1.0.0) + Carica le detection esistenti dal database + (non filtriamo per model_version perché la colonna non esiste) """ - print("\n[1] Caricamento detection vecchio modello (v1.0.0)...") + print("\n[1] Caricamento detection esistenti dal database...") conn = get_db_connection() cursor = conn.cursor(cursor_factory=RealDictCursor) @@ -39,16 +40,15 @@ def load_old_detections(limit=100): query = """ SELECT d.id, - d.ip_address, - d.anomaly_score, - d.risk_level, - d.detection_count, - d.last_detected, + d.source_ip, + d.risk_score, + d.anomaly_type, + d.log_count, + d.last_seen, d.blocked, - d.model_version + d.detected_at FROM detections d - WHERE d.model_version = '1.0.0' - ORDER BY d.anomaly_score DESC + ORDER BY d.risk_score DESC LIMIT %s """ @@ -57,7 +57,7 @@ def load_old_detections(limit=100): cursor.close() conn.close() - print(f" Trovate {len(detections)} detection del modello v1.0.0") + print(f" Trovate {len(detections)} detection nel database") return detections @@ -127,20 +127,20 @@ def reanalyze_with_hybrid(detector, ip_address, old_detection): 'ip_address': ip_address, 'logs_count': len(logs), - # Vecchio modello (v1.0.0) - 'old_score': old_detection['anomaly_score'], - 'old_risk_level': old_detection['risk_level'], + # Detection corrente nel DB + 'old_score': float(old_detection['risk_score']), + 'old_anomaly_type': old_detection['anomaly_type'], 'old_blocked': old_detection['blocked'], - # Nuovo modello (v2.0.0) + # Nuovo modello Hybrid (rianalisi) 'new_score': new_detection.get('anomaly_score', 0), - 'new_risk_level': new_detection.get('risk_level', 'unknown'), + 'new_anomaly_type': new_detection.get('anomaly_type', 'unknown'), 'new_confidence': new_detection.get('confidence', 'unknown'), 'new_is_anomaly': new_detection.get('is_anomaly', False), # Delta - 'score_delta': new_detection.get('anomaly_score', 0) - old_detection['anomaly_score'], - 'risk_changed': old_detection['risk_level'] != new_detection.get('risk_level', 'unknown'), + 'score_delta': new_detection.get('anomaly_score', 0) - float(old_detection['risk_score']), + 'type_changed': old_detection['anomaly_type'] != new_detection.get('anomaly_type', 'unknown'), } return comparison @@ -148,14 +148,14 @@ def reanalyze_with_hybrid(detector, ip_address, old_detection): def main(): print("\n" + "="*80) - print(" IDS MODEL COMPARISON - v1.0.0 vs v2.0.0") + print(" IDS MODEL COMPARISON - DB Current vs Hybrid Detector v2.0.0") print("="*80) - # Carica vecchie detection + # Carica detection esistenti old_detections = load_old_detections(limit=50) if not old_detections: - print("\n❌ Nessuna detection del vecchio modello trovata!") + print("\n❌ Nessuna detection trovata nel database!") return # Carica nuovo modello Hybrid @@ -170,23 +170,23 @@ def main(): print(f" ✅ Hybrid Detector caricato (18 feature selezionate)") # Rianalizza ogni IP con nuovo modello - print(f"\n[3] Rianalisi di {len(old_detections)} IP con nuovo modello...") + print(f"\n[3] Rianalisi di {len(old_detections)} IP con nuovo modello Hybrid...") print(" (Questo può richiedere alcuni minuti...)") comparisons = [] for i, old_det in enumerate(old_detections): - ip = old_det['ip_address'] + ip = old_det['source_ip'] print(f"\n [{i+1}/{len(old_detections)}] Analisi IP: {ip}") - print(f" Old: score={old_det['anomaly_score']:.1f}, risk={old_det['risk_level']}, blocked={old_det['blocked']}") + print(f" Current: score={float(old_det['risk_score']):.1f}, type={old_det['anomaly_type']}, blocked={old_det['blocked']}") comparison = reanalyze_with_hybrid(detector, ip, old_det) if comparison: comparisons.append(comparison) - print(f" New: score={comparison['new_score']:.1f}, risk={comparison['new_risk_level']}, confidence={comparison['new_confidence']}") - print(f" Δ: {comparison['score_delta']:+.1f} score") + print(f" Hybrid: score={comparison['new_score']:.1f}, type={comparison['new_anomaly_type']}, confidence={comparison['new_confidence']}") + print(f" Δ: {comparison['score_delta']:+.1f} score") else: print(f" ⚠ Nessun log recente trovato per questo IP") @@ -204,11 +204,11 @@ def main(): # Statistiche print(f"\nIP rianalizzati: {len(comparisons)}/{len(old_detections)}") print(f"\nScore medio:") - print(f" Vecchio modello: {df_comp['old_score'].mean():.1f}") - print(f" Nuovo modello: {df_comp['new_score'].mean():.1f}") - print(f" Delta medio: {df_comp['score_delta'].mean():+.1f}") + print(f" Detection correnti: {df_comp['old_score'].mean():.1f}") + print(f" Hybrid Detector: {df_comp['new_score'].mean():.1f}") + print(f" Delta medio: {df_comp['score_delta'].mean():+.1f}") - # False Positives (vecchio modello flaggava, nuovo no) + # False Positives (DB aveva score alto, Hybrid dice normale) false_positives = df_comp[ (df_comp['old_score'] >= 80) & (~df_comp['new_is_anomaly']) @@ -216,9 +216,9 @@ def main(): print(f"\n🎯 Possibili False Positives ridotti: {len(false_positives)}") if len(false_positives) > 0: - print("\n IP che vecchio modello bloccava ma nuovo modello ritiene normali:") + print("\n IP con score alto nel DB ma ritenuti normali dal Hybrid Detector:") for _, row in false_positives.iterrows(): - print(f" • {row['ip_address']} (old={row['old_score']:.0f}, new={row['new_score']:.0f})") + print(f" • {row['ip_address']} (DB={row['old_score']:.0f}, Hybrid={row['new_score']:.0f})") # True Positives confermati true_positives = df_comp[ @@ -226,18 +226,18 @@ def main(): (df_comp['new_is_anomaly']) ] - print(f"\n✅ Anomalie confermate da entrambi i modelli: {len(true_positives)}") + print(f"\n✅ Anomalie confermate da Hybrid Detector: {len(true_positives)}") # Confidence breakdown (solo nuovo modello) if 'new_confidence' in df_comp.columns: - print(f"\n📊 Confidence Level distribuzione (nuovo modello):") + print(f"\n📊 Confidence Level distribuzione (Hybrid Detector):") conf_counts = df_comp['new_confidence'].value_counts() for conf, count in conf_counts.items(): print(f" • {conf}: {count} IP") - # Risk level changes - risk_changes = df_comp[df_comp['risk_changed']] - print(f"\n🔄 IP con cambio livello di rischio: {len(risk_changes)}") + # Type changes + type_changes = df_comp[df_comp['type_changed']] + print(f"\n🔄 IP con cambio tipo anomalia: {len(type_changes)}") # Top 10 maggiori riduzioni score print(f"\n📉 Top 10 riduzioni score (possibili FP corretti):")