ids.alfacom.it/extracted_idf/fix_cuda_libraries.sh
marco370 0bfe3258b5 Saved progress at the end of the loop
Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 1c71ce6e-1a3e-4f53-bb5d-77cdd22b8ea3
2025-11-11 09:15:10 +00:00

227 lines
7.3 KiB
Bash

#!/bin/bash
# =========================================================================
# FIX CUDA LIBRARIES - ALMALINUX 9.6 + TESLA M60
# Risolve "Cannot dlopen some GPU libraries"
# =========================================================================
set -e
echo "🔧 FIX CUDA LIBRARIES - ALMALINUX 9.6"
echo "======================================"
# 1. DIAGNOSTICA STATO ATTUALE
echo "📋 DIAGNOSTICA SISTEMA ATTUALE..."
echo "🎮 Driver NVIDIA:"
nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits
echo -e "\n🚀 CUDA Toolkit installato:"
if command -v nvcc &> /dev/null; then
nvcc --version
echo "✅ NVCC trovato"
else
echo "❌ NVCC non trovato"
fi
echo -e "\n📚 Librerie CUDA presenti:"
echo "Controllo /usr/local/cuda..."
ls -la /usr/local/ | grep cuda || echo "❌ Nessuna directory CUDA in /usr/local/"
echo -e "\nControllo /usr/lib64..."
ls /usr/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato in /usr/lib64/"
ls /usr/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato in /usr/lib64/"
echo -e "\nControllo /usr/local/cuda/lib64..."
if [ -d "/usr/local/cuda/lib64" ]; then
ls /usr/local/cuda/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato in CUDA lib64"
ls /usr/local/cuda/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato in CUDA lib64"
else
echo "❌ Directory /usr/local/cuda/lib64 non trovata"
fi
# 2. INSTALLAZIONE CUDA TOOLKIT COMPLETO
echo -e "\n🚀 INSTALLAZIONE CUDA TOOLKIT COMPLETO..."
# Rimuovi installazioni parziali
sudo dnf remove -y 'cuda*' 'nvidia-*' --exclude='nvidia-driver*,xorg-x11-drv-nvidia*' 2>/dev/null || true
# Reinstalla CUDA Toolkit completo compatibile con driver 550
echo "Installazione CUDA 12.2 (compatibile con driver 550.144.03)..."
sudo dnf install -y cuda-toolkit-12-2 --exclude="nvidia-driver*,xorg-x11-drv-nvidia*,nvidia-modprobe,nvidia-persistenced,nvidia-settings"
# Verifica installazione
echo -e "\n✅ Verifica installazione CUDA..."
if [ -d "/usr/local/cuda-12.2" ]; then
echo "✅ CUDA 12.2 installato in /usr/local/cuda-12.2"
ls -la /usr/local/cuda-12.2/lib64/ | head -10
else
echo "❌ CUDA 12.2 non trovato - provo versione alternativa"
sudo dnf install -y cuda-toolkit-12-1 --exclude="nvidia-driver*,xorg-x11-drv-nvidia*,nvidia-modprobe,nvidia-persistenced,nvidia-settings"
fi
# 3. CONFIGURAZIONE SYMLINK CUDA
echo -e "\n🔗 Configurazione symlink CUDA..."
sudo rm -f /usr/local/cuda
if [ -d "/usr/local/cuda-12.2" ]; then
sudo ln -sf /usr/local/cuda-12.2 /usr/local/cuda
CUDA_VERSION="12.2"
elif [ -d "/usr/local/cuda-12.1" ]; then
sudo ln -sf /usr/local/cuda-12.1 /usr/local/cuda
CUDA_VERSION="12.1"
else
echo "❌ Nessuna versione CUDA trovata"
exit 1
fi
echo "✅ Symlink creato: /usr/local/cuda -> /usr/local/cuda-$CUDA_VERSION"
# 4. CONFIGURAZIONE VARIABILI AMBIENTE
echo -e "\n🌍 Configurazione variabili ambiente..."
sudo tee /etc/profile.d/cuda.sh <<EOF
export CUDA_HOME=/usr/local/cuda
export PATH=\$CUDA_HOME/bin:\$PATH
export LD_LIBRARY_PATH=\$CUDA_HOME/lib64:\$LD_LIBRARY_PATH
export CUDA_PATH=\$CUDA_HOME
EOF
# Carica immediatamente
source /etc/profile.d/cuda.sh
export CUDA_HOME=/usr/local/cuda
export PATH=$CUDA_HOME/bin:$PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
export CUDA_PATH=$CUDA_HOME
# 5. DOWNLOAD E INSTALLAZIONE cuDNN
echo -e "\n📚 INSTALLAZIONE cuDNN..."
# Download cuDNN compatibile con CUDA 12.2
CUDNN_VERSION="9.0.0"
CUDNN_MAJOR="9"
echo "Download cuDNN $CUDNN_VERSION per CUDA $CUDA_VERSION..."
cd /tmp
# Link diretto cuDNN (può richiedere registrazione NVIDIA)
CUDNN_URL="https://developer.download.nvidia.com/compute/cudnn/9.0.0/local_installers/cudnn-linux-x86_64-9.0.0.312_cuda12-archive.tar.xz"
# Prova download
wget -O cudnn.tar.xz "$CUDNN_URL" || {
echo "⚠️ Download automatico fallito"
echo "📥 DOWNLOAD MANUALE NECESSARIO:"
echo " 1. Vai su: https://developer.nvidia.com/cudnn"
echo " 2. Registrati/Login"
echo " 3. Scarica: cuDNN v9.x per CUDA $CUDA_VERSION"
echo " 4. Salva come: /tmp/cudnn.tar.xz"
echo " 5. Rilanciare questo script"
if [ ! -f "/tmp/cudnn.tar.xz" ]; then
echo "❌ File cuDNN non trovato - proseguo senza cuDNN"
CUDNN_INSTALLED=false
else
CUDNN_INSTALLED=true
fi
}
if [ -f "/tmp/cudnn.tar.xz" ]; then
echo "✅ File cuDNN trovato - installazione..."
# Estrai cuDNN
tar -xf cudnn.tar.xz
# Trova directory estratta
CUDNN_DIR=$(find /tmp -maxdepth 1 -name "cudnn*" -type d | head -1)
if [ -n "$CUDNN_DIR" ]; then
echo "✅ cuDNN estratto in: $CUDNN_DIR"
# Copia headers
sudo cp $CUDNN_DIR/include/cudnn*.h /usr/local/cuda/include/
# Copia librerie
sudo cp -P $CUDNN_DIR/lib/libcudnn* /usr/local/cuda/lib64/
# Imposta permessi
sudo chmod a+r /usr/local/cuda/include/cudnn*.h
sudo chmod a+r /usr/local/cuda/lib64/libcudnn*
echo "✅ cuDNN installato"
CUDNN_INSTALLED=true
else
echo "❌ Errore estrazione cuDNN"
CUDNN_INSTALLED=false
fi
else
CUDNN_INSTALLED=false
fi
# 6. AGGIORNAMENTO LDCONFIG
echo -e "\n🔄 Aggiornamento ldconfig..."
echo "/usr/local/cuda/lib64" | sudo tee /etc/ld.so.conf.d/cuda.conf
sudo ldconfig
# 7. REINSTALLAZIONE TENSORFLOW GPU
echo -e "\n🤖 Reinstallazione TensorFlow GPU..."
pip3 uninstall -y tensorflow tensorflow-intel tensorflow-io-gcs-filesystem 2>/dev/null || true
# Installa versione compatibile
if [ "$CUDA_VERSION" = "12.2" ]; then
pip3 install tensorflow[and-cuda]==2.16.*
else
pip3 install tensorflow[and-cuda]==2.15.*
fi
# 8. TEST FINALE
echo -e "\n🧪 TEST CONFIGURAZIONE FINALE..."
echo "📋 Verifica librerie:"
echo "CUDA_HOME: $CUDA_HOME"
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
echo -e "\nLibrerie presenti:"
ls -la /usr/local/cuda/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato"
ls -la /usr/local/cuda/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato"
echo -e "\nTest nvcc:"
nvcc --version
echo -e "\nTest nvidia-smi:"
nvidia-smi
echo -e "\n🎯 TEST TENSORFLOW GPU:"
python3 -c "
import tensorflow as tf
print('TensorFlow version:', tf.__version__)
print('CUDA built:', tf.test.is_built_with_cuda())
gpus = tf.config.list_physical_devices('GPU')
print('GPU devices:', len(gpus))
if gpus:
print('GPU found:', gpus[0])
try:
with tf.device('/GPU:0'):
a = tf.constant([1.0, 2.0])
b = tf.constant([3.0, 4.0])
c = tf.add(a, b)
print('✅ GPU test successful:', c.numpy())
except Exception as e:
print('❌ GPU test failed:', e)
else:
print('❌ No GPU devices found')
"
echo -e "\n✅ CONFIGURAZIONE COMPLETATA!"
echo "============================="
if [ "$CUDNN_INSTALLED" = true ]; then
echo "✅ CUDA Toolkit: Installato"
echo "✅ cuDNN: Installato"
echo "🚀 Sistema pronto per DDoS Detection v04 GPU"
else
echo "✅ CUDA Toolkit: Installato"
echo "⚠️ cuDNN: Installazione manuale necessaria"
echo "🔄 Rilanciare script dopo download cuDNN"
fi
echo -e "\n🎯 COMANDI PROSSIMI:"
echo "source /etc/profile.d/cuda.sh"
echo "python3 analisys_04.py --max-records 1000000 --memory-optimize"
echo "python3 detect_multi_04.py --advanced --batch-size 2000"