Replit-Commit-Author: Agent Replit-Commit-Session-Id: 7a657272-55ba-4a79-9a2e-f1ed9bc7a528 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Event-Id: 1c71ce6e-1a3e-4f53-bb5d-77cdd22b8ea3
227 lines
7.3 KiB
Bash
227 lines
7.3 KiB
Bash
#!/bin/bash
|
|
# =========================================================================
|
|
# FIX CUDA LIBRARIES - ALMALINUX 9.6 + TESLA M60
|
|
# Risolve "Cannot dlopen some GPU libraries"
|
|
# =========================================================================
|
|
|
|
set -e
|
|
|
|
echo "🔧 FIX CUDA LIBRARIES - ALMALINUX 9.6"
|
|
echo "======================================"
|
|
|
|
# 1. DIAGNOSTICA STATO ATTUALE
|
|
echo "📋 DIAGNOSTICA SISTEMA ATTUALE..."
|
|
|
|
echo "🎮 Driver NVIDIA:"
|
|
nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits
|
|
|
|
echo -e "\n🚀 CUDA Toolkit installato:"
|
|
if command -v nvcc &> /dev/null; then
|
|
nvcc --version
|
|
echo "✅ NVCC trovato"
|
|
else
|
|
echo "❌ NVCC non trovato"
|
|
fi
|
|
|
|
echo -e "\n📚 Librerie CUDA presenti:"
|
|
echo "Controllo /usr/local/cuda..."
|
|
ls -la /usr/local/ | grep cuda || echo "❌ Nessuna directory CUDA in /usr/local/"
|
|
|
|
echo -e "\nControllo /usr/lib64..."
|
|
ls /usr/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato in /usr/lib64/"
|
|
ls /usr/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato in /usr/lib64/"
|
|
|
|
echo -e "\nControllo /usr/local/cuda/lib64..."
|
|
if [ -d "/usr/local/cuda/lib64" ]; then
|
|
ls /usr/local/cuda/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato in CUDA lib64"
|
|
ls /usr/local/cuda/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato in CUDA lib64"
|
|
else
|
|
echo "❌ Directory /usr/local/cuda/lib64 non trovata"
|
|
fi
|
|
|
|
# 2. INSTALLAZIONE CUDA TOOLKIT COMPLETO
|
|
echo -e "\n🚀 INSTALLAZIONE CUDA TOOLKIT COMPLETO..."
|
|
|
|
# Rimuovi installazioni parziali
|
|
sudo dnf remove -y 'cuda*' 'nvidia-*' --exclude='nvidia-driver*,xorg-x11-drv-nvidia*' 2>/dev/null || true
|
|
|
|
# Reinstalla CUDA Toolkit completo compatibile con driver 550
|
|
echo "Installazione CUDA 12.2 (compatibile con driver 550.144.03)..."
|
|
sudo dnf install -y cuda-toolkit-12-2 --exclude="nvidia-driver*,xorg-x11-drv-nvidia*,nvidia-modprobe,nvidia-persistenced,nvidia-settings"
|
|
|
|
# Verifica installazione
|
|
echo -e "\n✅ Verifica installazione CUDA..."
|
|
if [ -d "/usr/local/cuda-12.2" ]; then
|
|
echo "✅ CUDA 12.2 installato in /usr/local/cuda-12.2"
|
|
ls -la /usr/local/cuda-12.2/lib64/ | head -10
|
|
else
|
|
echo "❌ CUDA 12.2 non trovato - provo versione alternativa"
|
|
sudo dnf install -y cuda-toolkit-12-1 --exclude="nvidia-driver*,xorg-x11-drv-nvidia*,nvidia-modprobe,nvidia-persistenced,nvidia-settings"
|
|
fi
|
|
|
|
# 3. CONFIGURAZIONE SYMLINK CUDA
|
|
echo -e "\n🔗 Configurazione symlink CUDA..."
|
|
sudo rm -f /usr/local/cuda
|
|
if [ -d "/usr/local/cuda-12.2" ]; then
|
|
sudo ln -sf /usr/local/cuda-12.2 /usr/local/cuda
|
|
CUDA_VERSION="12.2"
|
|
elif [ -d "/usr/local/cuda-12.1" ]; then
|
|
sudo ln -sf /usr/local/cuda-12.1 /usr/local/cuda
|
|
CUDA_VERSION="12.1"
|
|
else
|
|
echo "❌ Nessuna versione CUDA trovata"
|
|
exit 1
|
|
fi
|
|
|
|
echo "✅ Symlink creato: /usr/local/cuda -> /usr/local/cuda-$CUDA_VERSION"
|
|
|
|
# 4. CONFIGURAZIONE VARIABILI AMBIENTE
|
|
echo -e "\n🌍 Configurazione variabili ambiente..."
|
|
sudo tee /etc/profile.d/cuda.sh <<EOF
|
|
export CUDA_HOME=/usr/local/cuda
|
|
export PATH=\$CUDA_HOME/bin:\$PATH
|
|
export LD_LIBRARY_PATH=\$CUDA_HOME/lib64:\$LD_LIBRARY_PATH
|
|
export CUDA_PATH=\$CUDA_HOME
|
|
EOF
|
|
|
|
# Carica immediatamente
|
|
source /etc/profile.d/cuda.sh
|
|
export CUDA_HOME=/usr/local/cuda
|
|
export PATH=$CUDA_HOME/bin:$PATH
|
|
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
|
|
export CUDA_PATH=$CUDA_HOME
|
|
|
|
# 5. DOWNLOAD E INSTALLAZIONE cuDNN
|
|
echo -e "\n📚 INSTALLAZIONE cuDNN..."
|
|
|
|
# Download cuDNN compatibile con CUDA 12.2
|
|
CUDNN_VERSION="9.0.0"
|
|
CUDNN_MAJOR="9"
|
|
|
|
echo "Download cuDNN $CUDNN_VERSION per CUDA $CUDA_VERSION..."
|
|
cd /tmp
|
|
|
|
# Link diretto cuDNN (può richiedere registrazione NVIDIA)
|
|
CUDNN_URL="https://developer.download.nvidia.com/compute/cudnn/9.0.0/local_installers/cudnn-linux-x86_64-9.0.0.312_cuda12-archive.tar.xz"
|
|
|
|
# Prova download
|
|
wget -O cudnn.tar.xz "$CUDNN_URL" || {
|
|
echo "⚠️ Download automatico fallito"
|
|
echo "📥 DOWNLOAD MANUALE NECESSARIO:"
|
|
echo " 1. Vai su: https://developer.nvidia.com/cudnn"
|
|
echo " 2. Registrati/Login"
|
|
echo " 3. Scarica: cuDNN v9.x per CUDA $CUDA_VERSION"
|
|
echo " 4. Salva come: /tmp/cudnn.tar.xz"
|
|
echo " 5. Rilanciare questo script"
|
|
|
|
if [ ! -f "/tmp/cudnn.tar.xz" ]; then
|
|
echo "❌ File cuDNN non trovato - proseguo senza cuDNN"
|
|
CUDNN_INSTALLED=false
|
|
else
|
|
CUDNN_INSTALLED=true
|
|
fi
|
|
}
|
|
|
|
if [ -f "/tmp/cudnn.tar.xz" ]; then
|
|
echo "✅ File cuDNN trovato - installazione..."
|
|
|
|
# Estrai cuDNN
|
|
tar -xf cudnn.tar.xz
|
|
|
|
# Trova directory estratta
|
|
CUDNN_DIR=$(find /tmp -maxdepth 1 -name "cudnn*" -type d | head -1)
|
|
|
|
if [ -n "$CUDNN_DIR" ]; then
|
|
echo "✅ cuDNN estratto in: $CUDNN_DIR"
|
|
|
|
# Copia headers
|
|
sudo cp $CUDNN_DIR/include/cudnn*.h /usr/local/cuda/include/
|
|
|
|
# Copia librerie
|
|
sudo cp -P $CUDNN_DIR/lib/libcudnn* /usr/local/cuda/lib64/
|
|
|
|
# Imposta permessi
|
|
sudo chmod a+r /usr/local/cuda/include/cudnn*.h
|
|
sudo chmod a+r /usr/local/cuda/lib64/libcudnn*
|
|
|
|
echo "✅ cuDNN installato"
|
|
CUDNN_INSTALLED=true
|
|
else
|
|
echo "❌ Errore estrazione cuDNN"
|
|
CUDNN_INSTALLED=false
|
|
fi
|
|
else
|
|
CUDNN_INSTALLED=false
|
|
fi
|
|
|
|
# 6. AGGIORNAMENTO LDCONFIG
|
|
echo -e "\n🔄 Aggiornamento ldconfig..."
|
|
echo "/usr/local/cuda/lib64" | sudo tee /etc/ld.so.conf.d/cuda.conf
|
|
sudo ldconfig
|
|
|
|
# 7. REINSTALLAZIONE TENSORFLOW GPU
|
|
echo -e "\n🤖 Reinstallazione TensorFlow GPU..."
|
|
pip3 uninstall -y tensorflow tensorflow-intel tensorflow-io-gcs-filesystem 2>/dev/null || true
|
|
|
|
# Installa versione compatibile
|
|
if [ "$CUDA_VERSION" = "12.2" ]; then
|
|
pip3 install tensorflow[and-cuda]==2.16.*
|
|
else
|
|
pip3 install tensorflow[and-cuda]==2.15.*
|
|
fi
|
|
|
|
# 8. TEST FINALE
|
|
echo -e "\n🧪 TEST CONFIGURAZIONE FINALE..."
|
|
|
|
echo "📋 Verifica librerie:"
|
|
echo "CUDA_HOME: $CUDA_HOME"
|
|
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
|
|
|
|
echo -e "\nLibrerie presenti:"
|
|
ls -la /usr/local/cuda/lib64/libcuda* 2>/dev/null || echo "❌ libcuda non trovato"
|
|
ls -la /usr/local/cuda/lib64/libcudnn* 2>/dev/null || echo "❌ libcudnn non trovato"
|
|
|
|
echo -e "\nTest nvcc:"
|
|
nvcc --version
|
|
|
|
echo -e "\nTest nvidia-smi:"
|
|
nvidia-smi
|
|
|
|
echo -e "\n🎯 TEST TENSORFLOW GPU:"
|
|
python3 -c "
|
|
import tensorflow as tf
|
|
print('TensorFlow version:', tf.__version__)
|
|
print('CUDA built:', tf.test.is_built_with_cuda())
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
print('GPU devices:', len(gpus))
|
|
if gpus:
|
|
print('GPU found:', gpus[0])
|
|
try:
|
|
with tf.device('/GPU:0'):
|
|
a = tf.constant([1.0, 2.0])
|
|
b = tf.constant([3.0, 4.0])
|
|
c = tf.add(a, b)
|
|
print('✅ GPU test successful:', c.numpy())
|
|
except Exception as e:
|
|
print('❌ GPU test failed:', e)
|
|
else:
|
|
print('❌ No GPU devices found')
|
|
"
|
|
|
|
echo -e "\n✅ CONFIGURAZIONE COMPLETATA!"
|
|
echo "============================="
|
|
|
|
if [ "$CUDNN_INSTALLED" = true ]; then
|
|
echo "✅ CUDA Toolkit: Installato"
|
|
echo "✅ cuDNN: Installato"
|
|
echo "🚀 Sistema pronto per DDoS Detection v04 GPU"
|
|
else
|
|
echo "✅ CUDA Toolkit: Installato"
|
|
echo "⚠️ cuDNN: Installazione manuale necessaria"
|
|
echo "🔄 Rilanciare script dopo download cuDNN"
|
|
fi
|
|
|
|
echo -e "\n🎯 COMANDI PROSSIMI:"
|
|
echo "source /etc/profile.d/cuda.sh"
|
|
echo "python3 analisys_04.py --max-records 1000000 --memory-optimize"
|
|
echo "python3 detect_multi_04.py --advanced --batch-size 2000" |