ids.alfacom.it/extracted_idf/diagnose_tesla_m60.sh
marco370 0bfe3258b5 Saved progress at the end of the loop
Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 1c71ce6e-1a3e-4f53-bb5d-77cdd22b8ea3
2025-11-11 09:15:10 +00:00

192 lines
6.2 KiB
Bash

#!/bin/bash
# =========================================================================
# DIAGNOSI COMPLETA TESLA M60 + TENSORFLOW
# Identifica esattamente il problema e la soluzione
# =========================================================================
set -e
echo "🔍 DIAGNOSI COMPLETA TESLA M60 + TENSORFLOW"
echo "==========================================="
# 1. VERIFICA DRIVER NVIDIA
echo "🎮 VERIFICA DRIVER NVIDIA..."
nvidia-smi --query-gpu=name,driver_version,compute_cap,memory.total --format=csv,noheader
echo -e "\n🔧 Verifica librerie driver..."
ls -la /usr/lib64/libcuda* || echo "❌ libcuda non trovato"
ls -la /usr/lib64/libnvidia* | head -5 || echo "❌ libnvidia non trovato"
# 2. VERIFICA INSTALLAZIONI CUDA
echo -e "\n📦 VERIFICA INSTALLAZIONI CUDA..."
echo "CUDA 12.2:"
ls -la /usr/local/cuda-12.2/lib64/libcuda* 2>/dev/null || echo "❌ CUDA 12.2 libcuda non trovato"
echo "CUDA 11.8:"
ls -la /usr/local/cuda-11.8/lib64/libcuda* 2>/dev/null || echo "❌ CUDA 11.8 libcuda non trovato"
# 3. VERIFICA cuDNN
echo -e "\n📚 VERIFICA cuDNN..."
echo "Sistema cuDNN:"
ls -la /usr/lib64/libcudnn* | head -3
echo "CUDA 11.8 cuDNN:"
ls -la /usr/local/cuda-11.8/lib64/libcudnn* | head -3
# 4. TEST TENSORFLOW DETTAGLIATO
echo -e "\n🧪 TEST TENSORFLOW DETTAGLIATO..."
# Abilita logging massimo per debug
export TF_CPP_MIN_LOG_LEVEL=0
export CUDA_VISIBLE_DEVICES=0
export CUDA_HOME=/usr/local/cuda-11.8
export LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:/usr/lib64:$LD_LIBRARY_PATH
python3 -c "
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # Massimo logging
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.8'
print('=== TENSORFLOW DEBUG DETTAGLIATO ===')
print('Python version:', sys.version)
print('CUDA_HOME:', os.environ.get('CUDA_HOME'))
print('LD_LIBRARY_PATH:', os.environ.get('LD_LIBRARY_PATH'))
try:
import tensorflow as tf
print('\\n✅ TensorFlow importato:', tf.__version__)
# Verifica build info
try:
build_info = tf.sysconfig.get_build_info()
print('\\n📋 TensorFlow Build Info:')
for key, value in build_info.items():
if 'cuda' in key.lower() or 'gpu' in key.lower():
print(f' {key}: {value}')
except Exception as e:
print('⚠️ Build info error:', e)
# Test CUDA availability
print('\\n🔍 CUDA Tests:')
print('is_built_with_cuda():', tf.test.is_built_with_cuda())
print('is_gpu_available():', tf.test.is_gpu_available())
print('is_built_with_gpu_support():', tf.test.is_built_with_gpu_support())
# Lista device fisici
physical_devices = tf.config.list_physical_devices()
print('\\n📱 All Physical Devices:')
for device in physical_devices:
print(f' {device}')
# Test specifico GPU
gpu_devices = tf.config.list_physical_devices('GPU')
print(f'\\n🎮 GPU Devices: {len(gpu_devices)}')
if gpu_devices:
for i, gpu in enumerate(gpu_devices):
print(f' GPU {i}: {gpu}')
try:
details = tf.config.experimental.get_device_details(gpu)
print(f' Details: {details}')
except Exception as e:
print(f' Details error: {e}')
except ImportError as e:
print('❌ TensorFlow import failed:', e)
except Exception as e:
print('❌ TensorFlow error:', e)
import traceback
traceback.print_exc()
"
# 5. TEST LIBRERIE DLOPEN
echo -e "\n🔗 TEST DLOPEN LIBRERIE..."
python3 -c "
import ctypes
import os
os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda-11.8/lib64:/usr/lib64'
print('=== TEST DLOPEN LIBRERIE ===')
# Test librerie critiche
libraries = [
'/usr/lib64/libcuda.so.1',
'/usr/local/cuda-11.8/lib64/libcudart.so',
'/usr/local/cuda-11.8/lib64/libcublas.so',
'/usr/local/cuda-11.8/lib64/libcufft.so',
'/usr/local/cuda-11.8/lib64/libcudnn.so'
]
for lib in libraries:
try:
handle = ctypes.CDLL(lib)
print(f'✅ {lib}: OK')
except Exception as e:
print(f'❌ {lib}: {e}')
"
# 6. VERIFICA COMPATIBILITÀ VERSIONI
echo -e "\n🔄 VERIFICA COMPATIBILITÀ VERSIONI..."
echo "Compute Capability Tesla M60:"
nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits
echo -e "\nVersioni installate:"
echo "Driver NVIDIA: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits)"
echo "CUDA Runtime: $(nvcc --version | grep 'release' || echo 'N/A')"
python3 -c "
import tensorflow as tf
try:
info = tf.sysconfig.get_build_info()
print(f'TensorFlow CUDA: {info.get(\"cuda_version\", \"N/A\")}')
print(f'TensorFlow cuDNN: {info.get(\"cudnn_version\", \"N/A\")}')
except:
print('TensorFlow build info non disponibile')
"
# 7. POSSIBILI SOLUZIONI
echo -e "\n💡 POSSIBILI SOLUZIONI..."
echo "Basandoci sui risultati della diagnosi:"
echo ""
echo "OPZIONE 1: TensorFlow più vecchio (2.10.x)"
echo " • pip3 install tensorflow==2.10.1"
echo " • Supporto garantito Tesla M60 CC 5.2"
echo ""
echo "OPZIONE 2: TensorFlow con conda (librerie embedded)"
echo " • conda install tensorflow-gpu=2.12"
echo " • Include librerie CUDA ottimizzate"
echo ""
echo "OPZIONE 3: CPU-only con ottimizzazioni"
echo " • Uso esclusivo CPU con parallelizzazione"
echo " • Performance comunque accettabili per DDoS Detection"
echo ""
echo "OPZIONE 4: Build TensorFlow custom"
echo " • Compilazione specifica per Tesla M60"
echo " • Tempo richiesto: 2-3 ore"
# 8. RACCOMANDAZIONE FINALE
echo -e "\n🎯 RACCOMANDAZIONE..."
GPU_COUNT=$(python3 -c "import tensorflow as tf; print(len(tf.config.list_physical_devices('GPU')))" 2>/dev/null || echo "0")
if [ "$GPU_COUNT" = "0" ]; then
echo "❌ GPU non rilevata - RACCOMANDAZIONE:"
echo ""
echo "🚀 SOLUZIONE IMMEDIATA: TensorFlow 2.10.1"
echo " pip3 uninstall tensorflow"
echo " pip3 install tensorflow==2.10.1"
echo " # TF 2.10.1 ha miglior supporto hardware legacy"
echo ""
echo "📊 ALTERNATIVE:"
echo " • CPU-only: Performance 75K record/sec (accettabile)"
echo " • Upgrade hardware: GPU moderna (RTX/Tesla P100+)"
echo " • Cloud GPU: AWS/GCP Tesla V100/A100"
else
echo "✅ GPU rilevata - sistema funzionante!"
fi
echo -e "\n🔚 DIAGNOSI COMPLETATA"