ids.alfacom.it/extracted_idf/test_gpu_almalinux.py
marco370 0bfe3258b5 Saved progress at the end of the loop
Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 1c71ce6e-1a3e-4f53-bb5d-77cdd22b8ea3
2025-11-11 09:15:10 +00:00

290 lines
10 KiB
Python

#!/usr/bin/env python3
"""
🚀 TEST GPU LIBRARIES - AlmaLinux + Tesla M60 CC 5.2
Verifica completa dell'installazione GPU per il sistema di produzione
"""
import sys
import time
import os
print("🚀 TEST GPU LIBRARIES - AlmaLinux + Tesla M60")
print("=" * 60)
print(f"🔧 Python: {sys.version}")
print(f"🔧 OS: {os.uname().sysname} {os.uname().release}")
# CONFIGURAZIONE TESLA M60
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_GPU_ALLOCATOR'] = 'legacy'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['RAPIDS_NO_INITIALIZE'] = '1'
print("🔧 Tesla M60 environment configurato")
# Test 1: Sistema CUDA Base
print("\n⚡ TEST 1: SISTEMA CUDA BASE")
try:
import subprocess
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
if result.returncode == 0:
lines = result.stdout.split('\n')
for line in lines:
if 'Tesla M60' in line:
print(f"✅ Tesla M60 rilevata: {line.strip()}")
break
for line in lines:
if 'Driver Version' in line:
print(f"{line.strip()}")
break
else:
print("❌ nvidia-smi non funziona")
except Exception as e:
print(f"⚠️ CUDA check error: {e}")
# Test 2: CuDF + CuPy
print("\n⚡ TEST 2: CuDF + CuPy")
try:
import cudf
import cupy as cp
# Test basic CuDF operations
print("🔧 Creazione DataFrame CuDF...")
df = cudf.DataFrame({
'id': range(100000),
'values': cp.random.random(100000),
'categories': cp.random.randint(0, 10, 100000)
})
# Operazioni base
result_sum = df.values.sum()
result_mean = df.values.mean()
result_groupby = df.groupby('categories')['values'].mean()
print(f"✅ CuDF: {len(df):,} record processati")
print(f" Sum: {result_sum:.2f}")
print(f" Mean: {result_mean:.4f}")
print(f" Groupby operations: {len(result_groupby)} gruppi")
# Memory info
mempool = cp.get_default_memory_pool()
used_mb = mempool.used_bytes() / 1024**2
print(f" GPU Memory CuDF: {used_mb:.1f}MB")
CUDF_AVAILABLE = True
except ImportError as e:
print(f"❌ CuDF non disponibile: {e}")
CUDF_AVAILABLE = False
except Exception as e:
print(f"⚠️ CuDF error: {e}")
CUDF_AVAILABLE = False
# Test 3: CuML Machine Learning
print("\n⚡ TEST 3: CuML MACHINE LEARNING")
try:
import cuml
from cuml.ensemble import IsolationForest
from cuml.neighbors import LocalOutlierFactor
from cuml.preprocessing import StandardScaler
from cuml.cluster import DBSCAN
if CUDF_AVAILABLE:
print("🔧 Preparazione dati ML...")
# Usa i dati dal test precedente
X = cp.stack([df.values.values, df.id.values / 100000.0], axis=1)
print(f" Dataset shape: {X.shape}")
# Test Isolation Forest
print("🔧 Test Isolation Forest GPU...")
iso_forest = IsolationForest(n_estimators=100, contamination=0.05)
iso_forest.fit(X)
iso_pred = iso_forest.predict(X)
iso_anomalies = cp.sum(iso_pred == -1)
print(f"✅ Isolation Forest: {iso_anomalies} anomalie su {len(X):,} campioni")
# Test LOF (con sample ridotto per memoria)
print("🔧 Test LOF GPU...")
sample_size = min(10000, len(X))
X_sample = X[:sample_size]
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
lof_pred = lof.fit_predict(X_sample)
lof_anomalies = cp.sum(lof_pred == -1)
print(f"✅ LOF: {lof_anomalies} anomalie su {len(X_sample):,} campioni")
# Test StandardScaler
print("🔧 Test StandardScaler GPU...")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_sample)
print(f"✅ StandardScaler: Mean={cp.mean(X_scaled):.6f}, Std={cp.std(X_scaled):.6f}")
# Test DBSCAN
print("🔧 Test DBSCAN GPU...")
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(X_scaled)
unique_labels = cp.unique(dbscan_labels)
print(f"✅ DBSCAN: {len(unique_labels)} cluster trovati")
else:
# Test base senza CuDF
print("🔧 Test CuML con dati numpy...")
import numpy as np
X_cpu = np.random.random((10000, 5)).astype(np.float32)
X_gpu = cp.asarray(X_cpu)
iso_forest = IsolationForest(n_estimators=50, contamination=0.1)
iso_forest.fit(X_gpu)
iso_pred = iso_forest.predict(X_gpu)
anomalies = cp.sum(iso_pred == -1)
print(f"✅ CuML base test: {anomalies} anomalie")
# Memory dopo ML
mempool = cp.get_default_memory_pool()
used_mb = mempool.used_bytes() / 1024**2
print(f" GPU Memory dopo ML: {used_mb:.1f}MB")
CUML_AVAILABLE = True
except ImportError as e:
print(f"❌ CuML non disponibile: {e}")
CUML_AVAILABLE = False
except Exception as e:
print(f"⚠️ CuML error: {e}")
CUML_AVAILABLE = False
# Test 4: TensorFlow GPU
print("\n⚡ TEST 4: TENSORFLOW GPU")
try:
import tensorflow as tf
# Configurazione Tesla M60
gpus = tf.config.list_physical_devices('GPU')
print(f"✅ TensorFlow {tf.__version__}")
print(f" GPU devices rilevati: {len(gpus)}")
if gpus:
# Configura memory growth per Tesla M60
try:
tf.config.experimental.set_memory_growth(gpus[0], True)
print(f"✅ Memory growth configurato per {gpus[0]}")
except RuntimeError as e:
print(f"⚠️ Memory growth warning: {e}")
# Test computation su GPU
print("🔧 Test computation TensorFlow GPU...")
with tf.device('/GPU:0'):
# Matrix operations
a = tf.random.normal([2000, 2000])
b = tf.random.normal([2000, 2000])
start_time = time.time()
c = tf.matmul(a, b)
result = tf.reduce_sum(c)
gpu_time = time.time() - start_time
print(f"✅ Matrix multiplication 2000x2000: {gpu_time:.3f}s")
print(f" Result sum: {result:.2f}")
# Neural network test
print("🔧 Test Neural Network GPU...")
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(100,)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# Compile e test
model.compile(optimizer='adam', loss='binary_crossentropy')
# Test data
X_test = tf.random.normal([1000, 100])
y_test = tf.random.uniform([1000, 1])
start_time = time.time()
model.fit(X_test, y_test, epochs=5, verbose=0)
train_time = time.time() - start_time
print(f"✅ Neural Network training: {train_time:.3f}s")
else:
print("❌ Nessuna GPU TensorFlow disponibile")
TENSORFLOW_AVAILABLE = True
except ImportError as e:
print(f"❌ TensorFlow non disponibile: {e}")
TENSORFLOW_AVAILABLE = False
except Exception as e:
print(f"⚠️ TensorFlow error: {e}")
TENSORFLOW_AVAILABLE = False
# Test 5: Memory Management Tesla M60
print("\n⚡ TEST 5: MEMORY MANAGEMENT TESLA M60")
try:
if CUDF_AVAILABLE:
mempool = cp.get_default_memory_pool()
total_bytes = 8 * 1024**3 # Tesla M60 8GB
used_bytes = mempool.used_bytes()
free_bytes = total_bytes - used_bytes
print(f"📊 Tesla M60 Memory Status:")
print(f" Totale: {total_bytes/1024**3:.1f}GB")
print(f" Utilizzata: {used_bytes/1024**2:.1f}MB ({used_bytes/total_bytes*100:.1f}%)")
print(f" Libera: {free_bytes/1024**2:.1f}MB")
# Test memory stress
print("🔧 Test memory allocation stress...")
test_arrays = []
try:
for i in range(5):
# Alloca 500MB per test
arr = cp.random.random((int(500*1024*1024/4),), dtype=cp.float32) # 500MB
test_arrays.append(arr)
current_used = mempool.used_bytes()
print(f" Allocation {i+1}: {current_used/1024**2:.1f}MB used")
except Exception as e:
print(f" Memory limit raggiunto: {e}")
finally:
# Cleanup
del test_arrays
mempool.free_all_blocks()
print(f"✅ Memory cleanup completato")
except Exception as e:
print(f"⚠️ Memory test error: {e}")
# RIEPILOGO FINALE
print("\n" + "="*60)
print("🎉 RIEPILOGO TEST ALMALINUX + TESLA M60")
print("="*60)
components = {
"CuDF + CuPy": CUDF_AVAILABLE,
"CuML": CUML_AVAILABLE,
"TensorFlow GPU": TENSORFLOW_AVAILABLE
}
for component, available in components.items():
status = "✅ DISPONIBILE" if available else "❌ NON DISPONIBILE"
print(f" {component}: {status}")
# Raccomandazioni
print("\n📋 RACCOMANDAZIONI:")
if all(components.values()):
print("✅ Configurazione OTTIMALE per 1M+ record!")
print(" - Usa CuDF per data loading (1M+ record)")
print(" - Usa CuML per ML algorithms")
print(" - TensorFlow GPU per neural networks")
print(" - Batch size consigliato: 100K record")
elif CUDF_AVAILABLE and CUML_AVAILABLE:
print("✅ Configurazione BUONA per GPU processing")
print(" - Usa CuDF + CuML per processing veloce")
print(" - TensorFlow fallback per neural networks")
elif TENSORFLOW_AVAILABLE:
print("⚠️ Configurazione LIMITATA")
print(" - Solo TensorFlow GPU disponibile")
print(" - Batch size consigliato: 500K record max")
else:
print("❌ FALLBACK CPU ONLY")
print(" - Tutte le librerie GPU mancanti")
print(" - Performance limitata per 1M+ record")
print(f"\n🚀 Tesla M60 pronta per processing DDoS Detection!")