Running on clusters¶

CBP (AMD GPU)¶

git clone https://github.com/intel/llvm.git

python3 buildbot/configure.py --hip --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-opt="-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm" --cmake-gen "Unix Makefiles"

cd build
make -j all libsycldevice install

CBP (Nvidia GPU)¶

git clone https://github.com/intel/llvm.git

python3 buildbot/configure.py --cuda --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-gen "Unix Makefiles"

cd build
make -j all libsycldevice install

Neowise¶

10 nodes, 10 cpus, 480 cores, 8 gpu per nodes

Compiling OpenSYCL¶

Seem's to be broken due to gcc headers

module purge
module load llvm-amdgpu/5.2.0_gcc-10.4.0 rocm-cmake/5.2.0_gcc-10.4.0 rocm-opencl/5.2.0_gcc-10.4.0 rocm-openmp-extras/5.2.0_gcc-10.4.0 rocm-smi-lib/5.2.3_gcc-10.4.0 rocminfo/5.2.0_gcc-10.4.0 llvm

cd OpenSYCL

cmake \
  -DWITH_ROCM_BACKEND=ON \
  -DWITH_SSCP_COMPILER=OFF \
  -DROCM_PATH=/opt/rocm \
  -DCMAKE_INSTALL_PREFIX=../OpenSYCL_comp .

make -j install

cd Shamrock
python3 buildbot/configure.py --gen make --tests --build release --outdir build --cxxpath ../OpenSYCL_comp --compiler opensycl --profile hip-gfx906

Compiling dpcpp¶

module load hip
module load openmpi

python3 buildbot/configure.py --hip --cmake-opt="-DCMAKE_INSTALL_PREFIX=../../dpcpp_compiler" --cmake-opt="-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm" --cmake-gen "Unix Makefiles"

export DPCPP_HOME=$(pwd)/dpcpp_compiler
export PATH=$DPCPP_HOME/bin:$PATH
export LD_LIBRARY_PATH=$DPCPP_HOME/lib:$LD_LIBRARY_PATH

tar -xvf Shamrock.tar.gz
cd Shamrock
python3 buildbot/configure.py --gen make --tests --build release --builddir dpcpp_rocm --cxxpath ../llvm/build --compiler intel_llvm --profile hip-gfx906 --cxxflags="--rocm-path=/opt/rocm"
cd dpcpp_rocm
make -j

Runing the code on 8 gpu per nodes

module load hip
module load openmpi
$(which mpirun) -machinefile $OAR_NODEFILE -npernode 8 -x PATH=~/dpcpp_compiler/bin:$PATH -x LD_LIBRARY_PATH=~/dpcpp_compiler/lib:$LD_LIBRARY_PATH ./shamrock --sycl-cfg auto:HIP --loglevel 1 --smi  --benchmark-mpi --rscript ../exemples/spherical_wave.py

module load hip
module load openmpi
$(which mpirun) -machinefile $OAR_NODEFILE -npernode 8 --mca pml ucx -x UCX_TLS=self,sm,rocm -x PATH=~/dpcpp_compiler/bin:$PATH -x LD_LIBRARY_PATH=~/dpcpp_compiler/lib:$LD_LIBRARY_PATH ./shamrock --sycl-cfg auto:HIP --loglevel 1 --smi  --benchmark-mpi --rscript ../exemples/spherical_wave.py

  359  oarsub -t exotic -p neowise -l gpu=1 -I
  360  oarsub -t exotic -p neowise -l gpu=16,walltime=0:30 -I
  361  oarsub -t exotic -p neowise -l gpu=16,walltime=0:30 -I
  362  oarsub -t exotic -p neowise -l gpu=32,walltime=0:30 -I
  363  oarsub -t exotic -p neowise -l gpu=64,walltime=0:30 -I