Running on clusters¶
CBP (AMD GPU)¶
git clone https://github.com/intel/llvm.git
python3 buildbot/configure.py --hip --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-opt="-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm" --cmake-gen "Unix Makefiles"
cd build
make -j all libsycldevice install
CBP (Nvidia GPU)¶
git clone https://github.com/intel/llvm.git
python3 buildbot/configure.py --cuda --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-gen "Unix Makefiles"
cd build
make -j all libsycldevice install
Neowise¶
10 nodes, 10 cpus, 480 cores, 8 gpu per nodes
Compiling OpenSYCL¶
Seem's to be broken due to gcc headers
module purge
module load llvm-amdgpu/5.2.0_gcc-10.4.0 rocm-cmake/5.2.0_gcc-10.4.0 rocm-opencl/5.2.0_gcc-10.4.0 rocm-openmp-extras/5.2.0_gcc-10.4.0 rocm-smi-lib/5.2.3_gcc-10.4.0 rocminfo/5.2.0_gcc-10.4.0 llvm
cd OpenSYCL
cmake \
-DWITH_ROCM_BACKEND=ON \
-DWITH_SSCP_COMPILER=OFF \
-DROCM_PATH=/opt/rocm \
-DCMAKE_INSTALL_PREFIX=../OpenSYCL_comp .
make -j install
cd Shamrock
python3 buildbot/configure.py --gen make --tests --build release --outdir build --cxxpath ../OpenSYCL_comp --compiler opensycl --profile hip-gfx906
Compiling dpcpp¶
module load hip
module load openmpi
python3 buildbot/configure.py --hip --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-opt="-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm" --cmake-gen "Unix Makefiles"
export DPCPP_HOME=$(pwd)/dpcpp_compiler
export PATH=$DPCPP_HOME/bin:$PATH
export LD_LIBRARY_PATH=$DPCPP_HOME/lib:$LD_LIBRARY_PATH
tar -xvf Shamrock.tar.gz
cd Shamrock
python3 buildbot/configure.py --gen make --tests --build release --builddir dpcpp_rocm --cxxpath ../llvm/build --compiler intel_llvm --profile hip-gfx906 --cxxflags="--rocm-path=/opt/rocm"
cd dpcpp_rocm
make -j
Runing the code on 8 gpu per nodes
module load hip
module load openmpi
$(which mpirun) -machinefile $OAR_NODEFILE -npernode 8 -x PATH=~/dpcpp_compiler/bin:$PATH -x LD_LIBRARY_PATH=~/dpcpp_compiler/lib:$LD_LIBRARY_PATH ./shamrock --sycl-cfg auto:HIP --loglevel 1 --sycl-ls-map --benchmark-mpi --rscript ../exemples/spherical_wave.py
module load hip
module load openmpi
$(which mpirun) -machinefile $OAR_NODEFILE -npernode 8 --mca pml ucx -x UCX_TLS=self,sm,rocm -x PATH=~/dpcpp_compiler/bin:$PATH -x LD_LIBRARY_PATH=~/dpcpp_compiler/lib:$LD_LIBRARY_PATH ./shamrock --sycl-cfg auto:HIP --loglevel 1 --sycl-ls-map --benchmark-mpi --rscript ../exemples/spherical_wave.py
359 oarsub -t exotic -p neowise -l gpu=1 -I
360 oarsub -t exotic -p neowise -l gpu=16,walltime=0:30 -I
361 oarsub -t exotic -p neowise -l gpu=16,walltime=0:30 -I
362 oarsub -t exotic -p neowise -l gpu=32,walltime=0:30 -I
363 oarsub -t exotic -p neowise -l gpu=64,walltime=0:30 -I