#!/bin/bash # ========================================================================= # FIX TENSORFLOW FINALE - RISOLVE CONFLITTI LIBRERIE NVIDIA # Tesla M60 + AlmaLinux 9.6 + Conflitto dipendenze TensorFlow # ========================================================================= set -e echo "๐Ÿ”ง FIX TENSORFLOW FINALE - RISOLUZIONE CONFLITTI NVIDIA" echo "=======================================================" # 1. DIAGNOSI DETTAGLIATA LIBRERIE echo "๐Ÿ” DIAGNOSI DETTAGLIATA LIBRERIE MANCANTI..." # Abilita logging dettagliato TensorFlow export TF_CPP_MIN_LOG_LEVEL=0 export CUDA_VISIBLE_DEVICES=0 # Test specifico per capire quali librerie mancano python3 -c " import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' import tensorflow as tf print('Test librerie...') " 2>&1 | grep -E "(Cannot dlopen|No such file|not found)" || echo "Nessun errore specifico trovato" # 2. RIMOZIONE DIPENDENZE NVIDIA TENSORFLOW echo -e "\n๐Ÿ—‘๏ธ Rimozione dipendenze NVIDIA TensorFlow conflittuali..." pip3 uninstall -y \ nvidia-cudnn-cu12 \ nvidia-cuda-nvrtc-cu12 \ nvidia-cuda-nvcc-cu12 \ nvidia-cusparse-cu12 \ nvidia-nvjitlink-cu12 \ nvidia-cuda-cupti-cu12 \ nvidia-cuda-runtime-cu12 \ nvidia-cusolver-cu12 \ nvidia-curand-cu12 \ nvidia-cublas-cu12 \ nvidia-cufft-cu12 \ nvidia-nccl-cu12 \ 2>/dev/null || true # 3. VERIFICA E CREAZIONE LIBRERIE MANCANTI echo -e "\n๐Ÿ”— Verifica e creazione symlink librerie critiche..." # Directory target TARGET_DIR="/usr/local/cuda/lib64" # Verifica presenza librerie critiche echo "Controllo librerie critiche:" # libcudart (giร  presente) if [ -f "$TARGET_DIR/libcudart.so" ]; then echo "โœ… libcudart.so: OK" else echo "โŒ libcudart.so: MANCANTE" fi # libcublas (critica per TensorFlow) if [ ! -f "$TARGET_DIR/libcublas.so.12" ]; then echo "๐Ÿ”ง Installazione libcublas..." sudo dnf install -y libcublas-12-* || true # Cerca in /usr/lib64 if [ -f "/usr/lib64/libcublas.so.12" ]; then sudo ln -sf /usr/lib64/libcublas.so.12 $TARGET_DIR/libcublas.so.12 sudo ln -sf /usr/lib64/libcublas.so.12 $TARGET_DIR/libcublas.so echo "โœ… libcublas symlink creato" fi fi # libcufft (critica per TensorFlow) if [ ! -f "$TARGET_DIR/libcufft.so.11" ]; then echo "๐Ÿ”ง Installazione libcufft..." sudo dnf install -y libcufft-12-* || true # Cerca in /usr/lib64 if [ -f "/usr/lib64/libcufft.so.11" ]; then sudo ln -sf /usr/lib64/libcufft.so.11 $TARGET_DIR/libcufft.so.11 sudo ln -sf /usr/lib64/libcufft.so.11 $TARGET_DIR/libcufft.so echo "โœ… libcufft symlink creato" fi fi # libcurand (critica per TensorFlow) if [ ! -f "$TARGET_DIR/libcurand.so.10" ]; then echo "๐Ÿ”ง Installazione libcurand..." sudo dnf install -y libcurand-12-* || true # Cerca in /usr/lib64 if [ -f "/usr/lib64/libcurand.so.10" ]; then sudo ln -sf /usr/lib64/libcurand.so.10 $TARGET_DIR/libcurand.so.10 sudo ln -sf /usr/lib64/libcurand.so.10 $TARGET_DIR/libcurand.so echo "โœ… libcurand symlink creato" fi fi # libcusolver (critica per TensorFlow) if [ ! -f "$TARGET_DIR/libcusolver.so.11" ]; then echo "๐Ÿ”ง Installazione libcusolver..." sudo dnf install -y libcusolver-12-* || true # Cerca in /usr/lib64 if [ -f "/usr/lib64/libcusolver.so.11" ]; then sudo ln -sf /usr/lib64/libcusolver.so.11 $TARGET_DIR/libcusolver.so.11 sudo ln -sf /usr/lib64/libcusolver.so.11 $TARGET_DIR/libcusolver.so echo "โœ… libcusolver symlink creato" fi fi # libcusparse (critica per TensorFlow) if [ ! -f "$TARGET_DIR/libcusparse.so.12" ]; then echo "๐Ÿ”ง Installazione libcusparse..." sudo dnf install -y libcusparse-12-* || true # Cerca in /usr/lib64 if [ -f "/usr/lib64/libcusparse.so.12" ]; then sudo ln -sf /usr/lib64/libcusparse.so.12 $TARGET_DIR/libcusparse.so.12 sudo ln -sf /usr/lib64/libcusparse.so.12 $TARGET_DIR/libcusparse.so echo "โœ… libcusparse symlink creato" fi fi # 4. INSTALLAZIONE LIBRERIE CUDA MANCANTI echo -e "\n๐Ÿ“ฆ Installazione librerie CUDA complete..." sudo dnf install -y \ cuda-libraries-12-2 \ cuda-runtime-12-2 \ libcublas-12-* \ libcufft-12-* \ libcurand-12-* \ libcusolver-12-* \ libcusparse-12-* \ --exclude="nvidia-driver*,xorg-x11-drv-nvidia*" || true # 5. AGGIORNAMENTO LDCONFIG echo -e "\n๐Ÿ”„ Aggiornamento ldconfig finale..." sudo ldconfig # 6. REINSTALLAZIONE TENSORFLOW BASE echo -e "\n๐Ÿค– Reinstallazione TensorFlow senza dipendenze NVIDIA..." # Rimuovi TensorFlow pip3 uninstall -y tensorflow tensorflow-io-gcs-filesystem 2>/dev/null || true # Installa TensorFlow base senza dipendenze CUDA automatiche pip3 install --no-deps tensorflow==2.16.1 pip3 install --no-deps tensorflow-io-gcs-filesystem # Installa solo le dipendenze necessarie (non CUDA) pip3 install \ absl-py \ astunparse \ flatbuffers \ gast \ google-pasta \ grpcio \ h5py \ keras \ libclang \ ml-dtypes \ numpy \ opt-einsum \ packaging \ protobuf \ requests \ setuptools \ six \ tensorboard \ termcolor \ typing-extensions \ wrapt # 7. TEST FINALE CON LOGGING COMPLETO echo -e "\n๐Ÿงช TEST TENSORFLOW FINALE CON LOGGING COMPLETO..." # Abilita logging massimo export TF_CPP_MIN_LOG_LEVEL=0 export CUDA_VISIBLE_DEVICES=0 export LD_DEBUG=libs echo "๐Ÿ“‹ Stato finale librerie:" echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" ls -la /usr/local/cuda/lib64/lib{cudart,cudnn,cublas,cufft,curand,cusolver,cusparse}* 2>/dev/null | head -20 echo -e "\n๐Ÿš€ TEST TENSORFLOW GPU CON LOGGING DETTAGLIATO:" python3 -c " import os import sys os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' os.environ['CUDA_VISIBLE_DEVICES'] = '0' print('=== TENSORFLOW GPU TEST ===') print('Python path:', sys.path[:3]) try: import tensorflow as tf print('TensorFlow importato:', tf.__version__) print('CUDA built:', tf.test.is_built_with_cuda()) print('GPU built:', tf.test.is_built_with_gpu_support()) # Forza inizializzazione CUDA print('Inizializzazione GPU...') gpus = tf.config.list_physical_devices('GPU') print(f'GPU fisiche trovate: {len(gpus)}') if gpus: print('GPU trovata:', gpus[0]) # Test minimo GPU with tf.device('/GPU:0'): a = tf.constant([1.0, 2.0]) b = tf.constant([3.0, 4.0]) c = tf.add(a, b) print('โœ… Test GPU riuscito:', c.numpy()) else: print('โŒ Nessuna GPU disponibile') # Debug librerie print('\\nDEBUG: Lista tutti device fisici:') all_devices = tf.config.list_physical_devices() for device in all_devices: print(f' {device}') except Exception as e: print(f'โŒ Errore TensorFlow: {e}') import traceback traceback.print_exc() " echo -e "\nโœ… CONFIGURAZIONE TENSORFLOW FINALE COMPLETATA!" echo "==================================================" if python3 -c "import tensorflow as tf; print(len(tf.config.list_physical_devices('GPU')))" 2>/dev/null | grep -q "1"; then echo "๐ŸŽ‰ SUCCESS: GPU TESLA M60 RILEVATA!" echo "๐Ÿš€ Sistema pronto per DDoS Detection v04 GPU" else echo "โš ๏ธ GPU non ancora rilevata - verifica manuale necessaria" echo "๐Ÿ’ก Prossimo: riavvio sistema o check driver" fi