ids.alfacom.it/extracted_idf/test_gpu_almalinux.py

#!/usr/bin/env python3
"""
🚀 TEST GPU LIBRARIES - AlmaLinux + Tesla M60 CC 5.2
Verifica completa dell'installazione GPU per il sistema di produzione
"""

import sys
import time
import os

print("🚀 TEST GPU LIBRARIES - AlmaLinux + Tesla M60")
print("=" * 60)
print(f"🔧 Python: {sys.version}")
print(f"🔧 OS: {os.uname().sysname} {os.uname().release}")

# CONFIGURAZIONE TESLA M60
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_GPU_ALLOCATOR'] = 'legacy'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['RAPIDS_NO_INITIALIZE'] = '1'
print("🔧 Tesla M60 environment configurato")

# Test 1: Sistema CUDA Base
print("\n⚡ TEST 1: SISTEMA CUDA BASE")
try:
    import subprocess
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    if result.returncode == 0:
        lines = result.stdout.split('\n')
        for line in lines:
            if 'Tesla M60' in line:
                print(f"✅ Tesla M60 rilevata: {line.strip()}")
                break
        for line in lines:
            if 'Driver Version' in line:
                print(f"✅ {line.strip()}")
                break
    else:
        print("❌ nvidia-smi non funziona")
except Exception as e:
    print(f"⚠️ CUDA check error: {e}")

# Test 2: CuDF + CuPy
print("\n⚡ TEST 2: CuDF + CuPy")
try:
    import cudf
    import cupy as cp

    # Test basic CuDF operations
    print("🔧 Creazione DataFrame CuDF...")
    df = cudf.DataFrame({
        'id': range(100000),
        'values': cp.random.random(100000),
        'categories': cp.random.randint(0, 10, 100000)
    })

    # Operazioni base
    result_sum = df.values.sum()
    result_mean = df.values.mean()
    result_groupby = df.groupby('categories')['values'].mean()

    print(f"✅ CuDF: {len(df):,} record processati")
    print(f"   Sum: {result_sum:.2f}")
    print(f"   Mean: {result_mean:.4f}")
    print(f"   Groupby operations: {len(result_groupby)} gruppi")

    # Memory info
    mempool = cp.get_default_memory_pool()
    used_mb = mempool.used_bytes() / 1024**2
    print(f"   GPU Memory CuDF: {used_mb:.1f}MB")

    CUDF_AVAILABLE = True

except ImportError as e:
    print(f"❌ CuDF non disponibile: {e}")
    CUDF_AVAILABLE = False
except Exception as e:
    print(f"⚠️ CuDF error: {e}")
    CUDF_AVAILABLE = False

# Test 3: CuML Machine Learning
print("\n⚡ TEST 3: CuML MACHINE LEARNING")
try:
    import cuml
    from cuml.ensemble import IsolationForest
    from cuml.neighbors import LocalOutlierFactor
    from cuml.preprocessing import StandardScaler
    from cuml.cluster import DBSCAN

    if CUDF_AVAILABLE:
        print("🔧 Preparazione dati ML...")
        # Usa i dati dal test precedente
        X = cp.stack([df.values.values, df.id.values / 100000.0], axis=1)
        print(f"   Dataset shape: {X.shape}")

        # Test Isolation Forest
        print("🔧 Test Isolation Forest GPU...")
        iso_forest = IsolationForest(n_estimators=100, contamination=0.05)
        iso_forest.fit(X)
        iso_pred = iso_forest.predict(X)
        iso_anomalies = cp.sum(iso_pred == -1)
        print(f"✅ Isolation Forest: {iso_anomalies} anomalie su {len(X):,} campioni")

        # Test LOF (con sample ridotto per memoria)
        print("🔧 Test LOF GPU...")
        sample_size = min(10000, len(X))
        X_sample = X[:sample_size]
        lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
        lof_pred = lof.fit_predict(X_sample)
        lof_anomalies = cp.sum(lof_pred == -1)
        print(f"✅ LOF: {lof_anomalies} anomalie su {len(X_sample):,} campioni")

        # Test StandardScaler
        print("🔧 Test StandardScaler GPU...")
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_sample)
        print(f"✅ StandardScaler: Mean={cp.mean(X_scaled):.6f}, Std={cp.std(X_scaled):.6f}")

        # Test DBSCAN
        print("🔧 Test DBSCAN GPU...")
        dbscan = DBSCAN(eps=0.5, min_samples=5)
        dbscan_labels = dbscan.fit_predict(X_scaled)
        unique_labels = cp.unique(dbscan_labels)
        print(f"✅ DBSCAN: {len(unique_labels)} cluster trovati")

    else:
        # Test base senza CuDF
        print("🔧 Test CuML con dati numpy...")
        import numpy as np
        X_cpu = np.random.random((10000, 5)).astype(np.float32)
        X_gpu = cp.asarray(X_cpu)

        iso_forest = IsolationForest(n_estimators=50, contamination=0.1)
        iso_forest.fit(X_gpu)
        iso_pred = iso_forest.predict(X_gpu)
        anomalies = cp.sum(iso_pred == -1)
        print(f"✅ CuML base test: {anomalies} anomalie")

    # Memory dopo ML
    mempool = cp.get_default_memory_pool()
    used_mb = mempool.used_bytes() / 1024**2
    print(f"   GPU Memory dopo ML: {used_mb:.1f}MB")

    CUML_AVAILABLE = True

except ImportError as e:
    print(f"❌ CuML non disponibile: {e}")
    CUML_AVAILABLE = False
except Exception as e:
    print(f"⚠️ CuML error: {e}")
    CUML_AVAILABLE = False

# Test 4: TensorFlow GPU
print("\n⚡ TEST 4: TENSORFLOW GPU")
try:
    import tensorflow as tf

    # Configurazione Tesla M60
    gpus = tf.config.list_physical_devices('GPU')
    print(f"✅ TensorFlow {tf.__version__}")
    print(f"   GPU devices rilevati: {len(gpus)}")

    if gpus:
        # Configura memory growth per Tesla M60
        try:
            tf.config.experimental.set_memory_growth(gpus[0], True)
            print(f"✅ Memory growth configurato per {gpus[0]}")
        except RuntimeError as e:
            print(f"⚠️ Memory growth warning: {e}")

        # Test computation su GPU
        print("🔧 Test computation TensorFlow GPU...")
        with tf.device('/GPU:0'):
            # Matrix operations
            a = tf.random.normal([2000, 2000])
            b = tf.random.normal([2000, 2000])

            start_time = time.time()
            c = tf.matmul(a, b)
            result = tf.reduce_sum(c)
            gpu_time = time.time() - start_time

            print(f"✅ Matrix multiplication 2000x2000: {gpu_time:.3f}s")
            print(f"   Result sum: {result:.2f}")

            # Neural network test
            print("🔧 Test Neural Network GPU...")
            model = tf.keras.Sequential([
                tf.keras.layers.Dense(64, activation='relu', input_shape=(100,)),
                tf.keras.layers.Dense(32, activation='relu'),
                tf.keras.layers.Dense(1, activation='sigmoid')
            ])

            # Compile e test
            model.compile(optimizer='adam', loss='binary_crossentropy')

            # Test data
            X_test = tf.random.normal([1000, 100])
            y_test = tf.random.uniform([1000, 1])

            start_time = time.time()
            model.fit(X_test, y_test, epochs=5, verbose=0)
            train_time = time.time() - start_time

            print(f"✅ Neural Network training: {train_time:.3f}s")

    else:
        print("❌ Nessuna GPU TensorFlow disponibile")

    TENSORFLOW_AVAILABLE = True

except ImportError as e:
    print(f"❌ TensorFlow non disponibile: {e}")
    TENSORFLOW_AVAILABLE = False
except Exception as e:
    print(f"⚠️ TensorFlow error: {e}")
    TENSORFLOW_AVAILABLE = False

# Test 5: Memory Management Tesla M60
print("\n⚡ TEST 5: MEMORY MANAGEMENT TESLA M60")
try:
    if CUDF_AVAILABLE:
        mempool = cp.get_default_memory_pool()
        total_bytes = 8 * 1024**3  # Tesla M60 8GB
        used_bytes = mempool.used_bytes()
        free_bytes = total_bytes - used_bytes

        print(f"📊 Tesla M60 Memory Status:")
        print(f"   Totale: {total_bytes/1024**3:.1f}GB")
        print(f"   Utilizzata: {used_bytes/1024**2:.1f}MB ({used_bytes/total_bytes*100:.1f}%)")
        print(f"   Libera: {free_bytes/1024**2:.1f}MB")

        # Test memory stress
        print("🔧 Test memory allocation stress...")
        test_arrays = []
        try:
            for i in range(5):
                # Alloca 500MB per test
                arr = cp.random.random((int(500*1024*1024/4),), dtype=cp.float32)  # 500MB
                test_arrays.append(arr)
                current_used = mempool.used_bytes()
                print(f"   Allocation {i+1}: {current_used/1024**2:.1f}MB used")
        except Exception as e:
            print(f"   Memory limit raggiunto: {e}")
        finally:
            # Cleanup
            del test_arrays
            mempool.free_all_blocks()
            print(f"✅ Memory cleanup completato")

except Exception as e:
    print(f"⚠️ Memory test error: {e}")

# RIEPILOGO FINALE
print("\n" + "="*60)
print("🎉 RIEPILOGO TEST ALMALINUX + TESLA M60")
print("="*60)

components = {
    "CuDF + CuPy": CUDF_AVAILABLE,
    "CuML": CUML_AVAILABLE,
    "TensorFlow GPU": TENSORFLOW_AVAILABLE
}

for component, available in components.items():
    status = "✅ DISPONIBILE" if available else "❌ NON DISPONIBILE"
    print(f"   {component}: {status}")

# Raccomandazioni
print("\n📋 RACCOMANDAZIONI:")
if all(components.values()):
    print("✅ Configurazione OTTIMALE per 1M+ record!")
    print("   - Usa CuDF per data loading (1M+ record)")
    print("   - Usa CuML per ML algorithms")
    print("   - TensorFlow GPU per neural networks")
    print("   - Batch size consigliato: 100K record")
elif CUDF_AVAILABLE and CUML_AVAILABLE:
    print("✅ Configurazione BUONA per GPU processing")
    print("   - Usa CuDF + CuML per processing veloce")
    print("   - TensorFlow fallback per neural networks")
elif TENSORFLOW_AVAILABLE:
    print("⚠️ Configurazione LIMITATA")
    print("   - Solo TensorFlow GPU disponibile")
    print("   - Batch size consigliato: 500K record max")
else:
    print("❌ FALLBACK CPU ONLY")
    print("   - Tutte le librerie GPU mancanti")
    print("   - Performance limitata per 1M+ record")

print(f"\n🚀 Tesla M60 pronta per processing DDoS Detection!")