From 3425521215eba9ba7a86be1b4948805f713a00f9 Mon Sep 17 00:00:00 2001 From: marco370 <48531002-marco370@users.noreply.replit.com> Date: Fri, 2 Jan 2026 11:48:33 +0000 Subject: [PATCH] Update list fetching to handle new Spamhaus format and IP matching Update Spamhaus parser to support NDJSON format and fix IP matching errors by ensuring database migrations are applied. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: 11e93061-1fe5-4624-8362-9202aff893d7 Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/449cf7c4-c97a-45ae-8234-e5c5b8d6a84f/7a657272-55ba-4a79-9a2e-f1ed9bc7a528/rDib6Pq --- .replit | 16 ------ ...50-no-pager-Jan-02-12-30_1767354386554.txt | 51 +++++++++++++++++++ python_ml/list_fetcher/parsers.py | 17 +++++-- 3 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 attached_assets/Pasted-ournalctl-u-ids-list-fetcher-n-50-no-pager-Jan-02-12-30_1767354386554.txt diff --git a/.replit b/.replit index aa41490..36f7b53 100644 --- a/.replit +++ b/.replit @@ -14,22 +14,6 @@ run = ["npm", "run", "start"] localPort = 5000 externalPort = 80 -[[ports]] -localPort = 41303 -externalPort = 3002 - -[[ports]] -localPort = 43471 -externalPort = 3003 - -[[ports]] -localPort = 43803 -externalPort = 3000 - -[[ports]] -localPort = 45059 -externalPort = 3001 - [env] PORT = "5000" diff --git a/attached_assets/Pasted-ournalctl-u-ids-list-fetcher-n-50-no-pager-Jan-02-12-30_1767354386554.txt b/attached_assets/Pasted-ournalctl-u-ids-list-fetcher-n-50-no-pager-Jan-02-12-30_1767354386554.txt new file mode 100644 index 0000000..bb8eb8a --- /dev/null +++ b/attached_assets/Pasted-ournalctl-u-ids-list-fetcher-n-50-no-pager-Jan-02-12-30_1767354386554.txt @@ -0,0 +1,51 @@ +ournalctl -u ids-list-fetcher -n 50 --no-pager +Jan 02 12:30:01 ids.alfacom.it ids-list-fetcher[5571]: Cleaned invalid detections: 0 +Jan 02 12:30:01 ids.alfacom.it ids-list-fetcher[5571]: Skipped (whitelisted): 0 +Jan 02 12:30:01 ids.alfacom.it ids-list-fetcher[5571]: ============================================================ +Jan 02 12:30:01 ids.alfacom.it systemd[1]: ids-list-fetcher.service: Deactivated successfully. +Jan 02 12:30:01 ids.alfacom.it systemd[1]: Finished IDS Public Lists Fetcher Service. +Jan 02 12:40:01 ids.alfacom.it systemd[1]: Starting IDS Public Lists Fetcher Service... +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: [2026-01-02 12:40:01] PUBLIC LISTS SYNC +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: Found 2 enabled lists +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: [12:40:01] Downloading Spamhaus from https://www.spamhaus.org/drop/drop_v4.json... +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: [12:40:01] Downloading AWS from https://ip-ranges.amazonaws.com/ip-ranges.json... +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: [12:40:01] Parsing AWS... +Jan 02 12:40:01 ids.alfacom.it ids-list-fetcher[5730]: [12:40:01] Found 9548 IPs, syncing to database... +Jan 02 12:40:02 ids.alfacom.it ids-list-fetcher[5730]: [12:40:02] ✓ AWS: +9511 -0 ~0 +Jan 02 12:40:02 ids.alfacom.it ids-list-fetcher[5730]: [12:40:02] Parsing Spamhaus... +Jan 02 12:40:02 ids.alfacom.it ids-list-fetcher[5730]: [12:40:02] ✗ Spamhaus: No valid IPs found in list +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: SYNC SUMMARY +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Success: 1/2 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Errors: 1/2 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Total IPs Added: 9511 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Total IPs Removed: 0 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: RUNNING MERGE LOGIC +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ERROR:merge_logic:Failed to cleanup detections: operator does not exist: inet = text +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: LINE 9: d.source_ip::inet = wl.ip_inet +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ^ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ERROR:merge_logic:Failed to sync detections: operator does not exist: text <<= text +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: LINE 30: OR bl.ip_inet <<= wl.ip_inet +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ^ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Traceback (most recent call last): +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: File "/opt/ids/python_ml/merge_logic.py", line 264, in sync_public_blacklist_detections +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: cur.execute(""" +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: psycopg2.errors.UndefinedFunction: operator does not exist: text <<= text +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: LINE 30: OR bl.ip_inet <<= wl.ip_inet +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ^ +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Merge Logic Stats: +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Created detections: 0 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Cleaned invalid detections: 0 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: Skipped (whitelisted): 0 +Jan 02 12:40:03 ids.alfacom.it ids-list-fetcher[5730]: ============================================================ +Jan 02 12:40:03 ids.alfacom.it systemd[1]: ids-list-fetcher.service: Deactivated successfully. +Jan 02 12:40:03 ids.alfacom.it systemd[1]: Finished IDS Public Lists Fetcher Service. \ No newline at end of file diff --git a/python_ml/list_fetcher/parsers.py b/python_ml/list_fetcher/parsers.py index 7e63f3a..50c94b0 100644 --- a/python_ml/list_fetcher/parsers.py +++ b/python_ml/list_fetcher/parsers.py @@ -41,8 +41,8 @@ class SpamhausParser(ListParser): def parse(content: str) -> Set[tuple[str, Optional[str]]]: """ Parse Spamhaus DROP format: - ; Comment lines start with semicolon - 1.2.3.0/24 ; SBL12345 + - NDJSON (new): {"cidr":"1.2.3.0/24","sblid":"SBL12345","rir":"apnic"} + - Text (old): 1.2.3.0/24 ; SBL12345 """ ips = set() lines = content.strip().split('\n') @@ -54,7 +54,18 @@ class SpamhausParser(ListParser): if not line or line.startswith(';') or line.startswith('#'): continue - # Extract IP/CIDR before comment + # Try NDJSON format first (new Spamhaus format) + if line.startswith('{'): + try: + data = json.loads(line) + cidr = data.get('cidr') + if cidr and ListParser.validate_ip(cidr): + ips.add(ListParser.normalize_cidr(cidr)) + continue + except json.JSONDecodeError: + pass + + # Fallback: old text format parts = line.split(';') if parts: ip_part = parts[0].strip()