Add automatic database cleanup for older log entries

Implement automatic cleanup of log entries older than 3 days in the database and switch to a safer streaming mode for processing log files.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: intermediate_checkpoint
Replit-Commit-Event-Id: 70142a7e-f1e2-4668-9fee-f1ff7d8615ae
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/SXFWABi
This commit is contained in:
marco370 2025-11-22 10:37:12 +00:00
parent 06d6f47df1
commit 07c7e02770
3 changed files with 73 additions and 5 deletions

View File

@ -18,6 +18,10 @@ externalPort = 80
localPort = 41303 localPort = 41303
externalPort = 3002 externalPort = 3002
[[ports]]
localPort = 42303
externalPort = 3001
[[ports]] [[ports]]
localPort = 43803 localPort = 43803
externalPort = 3000 externalPort = 3000

View File

@ -134,15 +134,43 @@ class SyslogParser:
print(f"[ERROR] Errore salvataggio log: {e}") print(f"[ERROR] Errore salvataggio log: {e}")
self.conn.rollback() self.conn.rollback()
def cleanup_old_logs(self, days_to_keep: int = 3):
"""
Elimina log più vecchi di X giorni dal database
Fix: usa concatenazione stringa per INTERVAL (psycopg2 non supporta placeholder in literal)
"""
try:
# Fix: Concatena days in stringa SQL (non literal placeholder)
query = """
DELETE FROM network_logs
WHERE timestamp < NOW() - (%s || ' days')::interval
"""
self.cursor.execute(query, (str(days_to_keep),))
deleted_count = self.cursor.rowcount
self.conn.commit()
if deleted_count > 0:
print(f"[CLEANUP] ✅ Eliminati {deleted_count} log più vecchi di {days_to_keep} giorni")
return deleted_count
except Exception as e:
print(f"[ERROR] Errore cleanup log vecchi: {e}")
import traceback
traceback.print_exc()
self.conn.rollback()
return 0
def process_log_file(self, log_file: str, follow: bool = False): def process_log_file(self, log_file: str, follow: bool = False):
""" """
Processa file di log Processa file di log in modalità streaming (sicuro con rsyslog)
follow: se True, segue il file come 'tail -f' follow: se True, segue il file come 'tail -f'
""" """
print(f"[INFO] Processando {log_file} (follow={follow})") print(f"[INFO] Processando {log_file} (follow={follow})")
processed = 0 processed = 0
saved = 0 saved = 0
cleanup_counter = 0
try: try:
with open(log_file, 'r') as f: with open(log_file, 'r') as f:
@ -156,7 +184,17 @@ class SyslogParser:
if not line: if not line:
if follow: if follow:
time.sleep(0.1) # Attendi nuove righe time.sleep(0.1) # Attendi nuove righe
self.conn.commit() # Commit batch
# Commit batch ogni 100 righe processate
if processed > 0 and processed % 100 == 0:
self.conn.commit()
# Cleanup DB ogni 1000 righe (~ ogni minuto)
cleanup_counter += 1
if cleanup_counter >= 10000: # ~16 minuti
self.cleanup_old_logs(days_to_keep=3)
cleanup_counter = 0
continue continue
else: else:
break # Fine file break # Fine file
@ -172,12 +210,15 @@ class SyslogParser:
# Commit ogni 100 righe # Commit ogni 100 righe
if processed % 100 == 0: if processed % 100 == 0:
self.conn.commit() self.conn.commit()
print(f"[INFO] Processate {processed} righe, salvate {saved} log") if saved > 0:
print(f"[INFO] Processate {processed} righe, salvate {saved} log")
except KeyboardInterrupt: except KeyboardInterrupt:
print("\n[INFO] Interrotto dall'utente") print("\n[INFO] Interrotto dall'utente")
except Exception as e: except Exception as e:
print(f"[ERROR] Errore processamento file: {e}") print(f"[ERROR] Errore processamento file: {e}")
import traceback
traceback.print_exc()
finally: finally:
self.conn.commit() self.conn.commit()
print(f"[INFO] Totale: {processed} righe processate, {saved} log salvati") print(f"[INFO] Totale: {processed} righe processate, {saved} log salvati")
@ -229,8 +270,12 @@ def main():
print("[DEBUG] Connessione database...") print("[DEBUG] Connessione database...")
parser.connect_db() parser.connect_db()
# Processa file in modalità follow (come tail -f) # Processa file in modalità follow (tail -f) - SICURO con rsyslog
print("[INFO] Avvio processamento log (modalità follow)...") # Cleanup automatico DB ogni ~16 minuti
print("[INFO] Avvio processamento log (modalità streaming sicura)...")
print("[INFO] - Modalità follow (tail -f) compatibile con rsyslog")
print("[INFO] - Auto-cleanup log > 3 giorni dal database (ogni ~16 min)")
print("[INFO] - Commit batch ogni 100 righe")
parser.process_log_file(log_file, follow=True) parser.process_log_file(log_file, follow=True)
except Exception as e: except Exception as e:

View File

@ -148,6 +148,25 @@ The IDS features a React-based frontend for real-time monitoring, detection visu
- 📊 Tracciabilità: storico migrazioni applicate - 📊 Tracciabilità: storico migrazioni applicate
- **Documentazione**: `database-schema/README.md` - **Documentazione**: `database-schema/README.md`
### 🧹 Parser Auto-Cleanup DB + Retention Policy (22 Nov 2025 - 12:00)
- **Feature**: Parser con gestione automatica database (retention 3 giorni)
- **Funzionalità**:
1. **Auto-cleanup DB**: Elimina automaticamente log più vecchi di 3 giorni ogni ~16 minuti
2. **Modalità streaming**: tail -f sicuro compatibile con rsyslog (no race conditions)
3. **Fix SQL**: Query INTERVAL corretta con concatenazione stringa psycopg2
- **Vantaggi**:
- ✅ Database mantiene solo ultimi 3 giorni (riduce da 4.5M a ~200K log)
- ✅ Sicuro: no truncate aggressivo che causa perdita log
- ✅ Streaming efficiente: readline invece readlines (no carico memoria)
- ✅ Zero manutenzione manuale richiesta
- **Workflow**:
1. Modalità follow (tail -f) legge log rsyslog in streaming
2. Commit batch ogni 100 righe
3. Cleanup automatico DB ogni ~16 minuti (10.000 iterazioni × 0.1s)
4. Elimina log timestamp < NOW() - 3 giorni
- **Note**: raw.log gestito da logrotate (non dal parser) per evitare race conditions
- **Deploy**: Riavviare `ids-syslog-parser` per applicare: `sudo systemctl restart ids-syslog-parser`
## Fix Recenti (Novembre 2025) ## Fix Recenti (Novembre 2025)
### 🔧 Fix Training History + Permessi Directory Models (22 Nov 2025 - 11:30) ### 🔧 Fix Training History + Permessi Directory Models (22 Nov 2025 - 11:30)