Adjust model comparison script to correctly process network logs
Correct logic in `compare_models.py` to pass raw network logs to the detection method, ensuring correct feature extraction and preventing a 'timestamp' KeyError. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: ecdb452a-13bf-4c0b-8da9-eebbafd63834 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/RJGlbTt
This commit is contained in:
parent
27499869ac
commit
2d7185cdbc
@ -0,0 +1,55 @@
|
|||||||
|
python compare_models.py
|
||||||
|
[WARNING] Extended Isolation Forest not available, using standard IF
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
IDS MODEL COMPARISON - DB Current vs Hybrid Detector v2.0.0
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
[1] Caricamento detection esistenti dal database...
|
||||||
|
Trovate 50 detection nel database
|
||||||
|
|
||||||
|
[2] Caricamento nuovo Hybrid Detector (v2.0.0)...
|
||||||
|
[HYBRID] Ensemble classifier loaded
|
||||||
|
[HYBRID] Models loaded (version: latest)
|
||||||
|
[HYBRID] Selected features: 18/25
|
||||||
|
[HYBRID] Mode: Hybrid (IF + Ensemble)
|
||||||
|
✅ Hybrid Detector caricato (18 feature selezionate)
|
||||||
|
|
||||||
|
[3] Rianalisi di 50 IP con nuovo modello Hybrid...
|
||||||
|
(Questo può richiedere alcuni minuti...)
|
||||||
|
|
||||||
|
[1/50] Analisi IP: 185.203.25.138
|
||||||
|
Current: score=100.0, type=ddos, blocked=False
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/opt/ids/python_ml/venv/lib64/python3.11/site-packages/pandas/core/indexes/base.py", line 3790, in get_loc
|
||||||
|
return self._engine.get_loc(casted_key)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "index.pyx", line 152, in pandas._libs.index.IndexEngine.get_loc
|
||||||
|
File "index.pyx", line 181, in pandas._libs.index.IndexEngine.get_loc
|
||||||
|
File "pandas/_libs/hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
|
||||||
|
File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
|
||||||
|
KeyError: 'timestamp'
|
||||||
|
|
||||||
|
The above exception was the direct cause of the following exception:
|
||||||
|
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/opt/ids/python_ml/compare_models.py", line 265, in <module>
|
||||||
|
main()
|
||||||
|
File "/opt/ids/python_ml/compare_models.py", line 184, in main
|
||||||
|
comparison = reanalyze_with_hybrid(detector, ip, old_det)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/opt/ids/python_ml/compare_models.py", line 118, in reanalyze_with_hybrid
|
||||||
|
result = detector.detect(ip_features)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/opt/ids/python_ml/ml_hybrid_detector.py", line 507, in detect
|
||||||
|
features_df = self.extract_features(logs_df)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/opt/ids/python_ml/ml_hybrid_detector.py", line 98, in extract_features
|
||||||
|
logs_df['timestamp'] = pd.to_datetime(logs_df['timestamp'])
|
||||||
|
~~~~~~~^^^^^^^^^^^^^
|
||||||
|
File "/opt/ids/python_ml/venv/lib64/python3.11/site-packages/pandas/core/frame.py", line 3893, in __getitem__
|
||||||
|
indexer = self.columns.get_loc(key)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
File "/opt/ids/python_ml/venv/lib64/python3.11/site-packages/pandas/core/indexes/base.py", line 3797, in get_loc
|
||||||
|
raise KeyError(key) from err
|
||||||
|
KeyError: 'timestamp'
|
||||||
@ -105,21 +105,14 @@ def reanalyze_with_hybrid(detector, ip_address, old_detection):
|
|||||||
|
|
||||||
df = pd.DataFrame(logs)
|
df = pd.DataFrame(logs)
|
||||||
|
|
||||||
# Feature extraction (come nel detector)
|
# Il metodo detect() fa già l'extraction delle feature internamente
|
||||||
features_df = detector.extract_features(df)
|
# Passiamo direttamente i log grezzi
|
||||||
|
result = detector.detect(df, mode='all') # mode='all' per vedere tutti i risultati
|
||||||
if len(features_df) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Prendi le feature dell'IP aggregato
|
|
||||||
ip_features = features_df.iloc[0:1]
|
|
||||||
|
|
||||||
# Rianalizza con nuovo modello
|
|
||||||
result = detector.detect(ip_features)
|
|
||||||
|
|
||||||
if not result or len(result) == 0:
|
if not result or len(result) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Il detector raggruppa per source_ip, quindi dovrebbe esserci 1 risultato
|
||||||
new_detection = result[0]
|
new_detection = result[0]
|
||||||
|
|
||||||
# Confronto
|
# Confronto
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user