Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
847fdee
improve: Intel GPU Max (Ponte Vecchio) OpenMP target offload support
sbryngelson May 15, 2026
bf3e485
fix: ifx SPIR64 inliner ICE in m_compute_levelset (split loops + -fno…
sbryngelson May 15, 2026
bbb6ab7
docs: chemistry/pyrometheus Intel GPU verification and warning note
sbryngelson May 15, 2026
da087b0
fix: ifx SPIR64 InvalidArraySize — assumed-shape + local VLA fixes fo…
sbryngelson May 15, 2026
c307268
fix: ifx SPIR64 Level Zero link failures for Intel GPU Max (Ponte Vec…
sbryngelson May 15, 2026
5dba4e1
perf: Intel GPU Max Level Zero tuning in run toolchain
sbryngelson May 15, 2026
6e636de
fix: restore ifort (Intel classic) CPU build support in CMakeLists.txt
sbryngelson May 15, 2026
38762fc
feat: add GT CRNCH RoboGator (crnch) module entry for Intel GPU Max
sbryngelson May 15, 2026
c224384
fix: find clang-offload-bundler via ifx PATH when FC=mpiifx
sbryngelson May 15, 2026
fae09c2
fix: LAPACK PIE link error + crnch module Intel MPI paths
sbryngelson May 15, 2026
5ffc5f6
fix: LAPACK FortranCInterface PIE link failure on Ubuntu 22.04 with ifx
sbryngelson May 15, 2026
7c304d8
fix: build FFTW from source for all non-Cray compilers including Inte…
sbryngelson May 15, 2026
661e89c
Merge remote-tracking branch 'origin/master' into intel-gpu
sbryngelson May 15, 2026
3af3b6d
fix: crnch module — add I_MPI_FABRICS=shm for Intel MPI on single-node
sbryngelson May 15, 2026
6b1d0de
feat: Intel GPU Max (Ponte Vecchio) OpenMP target offload support
sbryngelson May 18, 2026
8d2c6b1
fix: replace integer kind literals with real literals in m_fftw.fpp
sbryngelson May 18, 2026
e5728fb
fix: extend VLA guards from USING_AMD to (USING_AMD or USING_INTEL) i…
sbryngelson May 18, 2026
0534d69
fix: use shm:ofi + FI_PROVIDER=tcp for Intel MPI on crnch-gpu (tcp fa…
sbryngelson May 18, 2026
8863022
docs: document Intel MPI multi-node SSH bootstrap workaround for miss…
sbryngelson May 18, 2026
faa9bbb
fix: add FI_PROVIDER_PATH to crnch-gpu modules; document SLURM GRES a…
sbryngelson May 18, 2026
c296e30
docs: document inter-node MPI fix (FI_TCP_IFACE) and dash3 renderD128…
sbryngelson May 18, 2026
6bc4c57
Merge sbryngelson/intel-gpu: integrate evolved Intel GPU fixes with m…
sbryngelson May 18, 2026
1a02fc5
Merge branch 'master' into intel-gpu
sbryngelson May 19, 2026
a4de5f2
fix: add ACES cluster support and ifx SPIR64 ICE workarounds for Inte…
sbryngelson May 19, 2026
6940725
fix: remove -O0 workaround for m_rhs/m_time_steppers; verified -O3 co…
sbryngelson May 19, 2026
b874530
fix: upgrade ACES to iimpi/2025a (ifx 2025.1.1); remove ifx 2023.2 SP…
sbryngelson May 19, 2026
577f93f
fix: use CMake/3.31.3 and Python/3.13.1 with iimpi/2025a on ACES (mod…
sbryngelson May 19, 2026
007e84d
fix: suppress inlining for m_compute_levelset on ifx IntelLLVM+OpenMP…
sbryngelson May 19, 2026
94e4d4b
test: add ifx #5633 ICE reproducers (matmul in declare-target sub)
sbryngelson May 19, 2026
9e01d89
fix: replace matmul intrinsic with f_mv3 in m_compute_levelset, remov…
sbryngelson May 19, 2026
4cd3df0
build: enable AOT compilation for Intel GPU, add mem=32G for ocloc li…
sbryngelson May 19, 2026
dd460d7
feat: add ACES batch template and fix build_intel_gpu.sh to include p…
sbryngelson May 19, 2026
3d7fe1c
Merge remote-tracking branch 'origin/intel-gpu' into intel-gpu
sbryngelson May 19, 2026
98036e8
fix: implement MFC_Intel_AOT in CMakeLists.txt for spir64_gen AOT com…
sbryngelson May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 94 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ option(MFC_DOCUMENTATION "Build documentation" OFF
option(MFC_ALL "Build everything" OFF)
option(MFC_SINGLE_PRECISION "Build single precision" OFF)
option(MFC_MIXED_PRECISION "Build mixed precision" OFF)
option(MFC_Intel_AOT "Build Intel GPU with AOT (spir64_gen) instead of JIT (spir64)" OFF)
set(MFC_Intel_AOT_DEVICE "pvc" CACHE STRING "ocloc device target for Intel AOT compilation (e.g. pvc, 0xbda)")

if (MFC_ALL)
set(MFC_PRE_PROCESS ON FORCE)
Expand Down Expand Up @@ -244,6 +246,15 @@ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang")
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-free>)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-g -Og -traceback -debug -check all)
elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug")
add_compile_options(-g -Og -traceback -check bounds)
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM")
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-free>
$<$<COMPILE_LANGUAGE:Fortran>:-fpp>)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-g -Og -traceback -debug -check all)
elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug")
Expand Down Expand Up @@ -595,6 +606,73 @@ exit 0
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
target_link_libraries(${a_target} PRIVATE ${HIPFFT_LIB})
endif()
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM")
# Intel GPU: oneMKL DFTI with !$omp dispatch for GPU FFT.
# Requires MKLROOT to be set (via oneAPI module or env).
if (NOT DEFINED ENV{MKLROOT})
message(FATAL_ERROR "MKLROOT is not set. Load oneAPI MKL module before building.")
endif()
# Compile mkl_dfti_omp_offload.f90 in isolation with minimal flags.
# The file uses !$omp declare variant with need_device_addr (OpenMP 5.2)
# which requires the global -free -fpp flags to be absent so the
# compiler parses it in standard fixed/free detection mode only.
set(_mkl_omp_src "$ENV{MKLROOT}/include/mkl_dfti_omp_offload.f90")
if (NOT EXISTS "${_mkl_omp_src}")
message(FATAL_ERROR "mkl_dfti_omp_offload.f90 not found in $ENV{MKLROOT}/include")
endif()
set(_mkl_omp_mod_dir "${CMAKE_CURRENT_BINARY_DIR}/mkl_omp_mods")
set(_mkl_omp_obj "${_mkl_omp_mod_dir}/mkl_dfti_omp_offload.o")
set(_mkl_omp_obj_host "${_mkl_omp_mod_dir}/mkl_dfti_omp_offload_host.o")
file(MAKE_DIRECTORY "${_mkl_omp_mod_dir}")
# Find clang-offload-bundler (in ifx's bin/compiler/ subdir).
# CMAKE_Fortran_COMPILER may be an MPI wrapper (mpiifx); resolve the
# underlying ifx from PATH so the HINTS point to the right directory.
find_program(_IFX_REAL ifx REQUIRED)
cmake_path(GET _IFX_REAL PARENT_PATH _ifx_bin)
find_program(CLANG_OFFLOAD_BUNDLER
NAMES clang-offload-bundler
HINTS "${_ifx_bin}/compiler" "${_ifx_bin}" "${_ifx_bin}-llvm"
REQUIRED)
add_custom_command(
OUTPUT "${_mkl_omp_obj}"
"${_mkl_omp_mod_dir}/mkl_dfti_omp_offload.mod"
COMMAND "${CMAKE_Fortran_COMPILER}"
-fiopenmp -fopenmp-targets=spir64
-c -I"$ENV{MKLROOT}/include"
"${_mkl_omp_src}"
-o "${_mkl_omp_obj}"
WORKING_DIRECTORY "${_mkl_omp_mod_dir}"
DEPENDS "${_mkl_omp_src}"
COMMENT "Compiling MKL OMP offload Fortran module (mkl_dfti_omp_offload)"
)
# Strip the SPIR-V device bundle so the linked object has only host code.
# The SPIR-V contains Import declarations for MKL SYCL DFT functions that
# the OpenMP Level Zero plugin cannot resolve, causing zeModuleDynamicLink
# failure. With host-only code, !$omp dispatch falls back to CPU for DFT.
add_custom_command(
OUTPUT "${_mkl_omp_obj_host}"
COMMAND "${CLANG_OFFLOAD_BUNDLER}"
--unbundle --type=o
--targets=host-x86_64-unknown-linux-gnu
--input="${_mkl_omp_obj}"
--output="${_mkl_omp_obj_host}"
DEPENDS "${_mkl_omp_obj}"
COMMENT "Stripping SPIR-V from MKL DFT object (host-only, fixes Level Zero link)"
)
add_custom_target(mkl_omp_offload_mod_${a_target}
DEPENDS "${_mkl_omp_obj_host}")
add_dependencies(${a_target} mkl_omp_offload_mod_${a_target})
target_include_directories(${a_target} PRIVATE
"$ENV{MKLROOT}/include" "${_mkl_omp_mod_dir}")
target_link_libraries(${a_target} PRIVATE "${_mkl_omp_obj_host}")
# Link MKL threading + core + SYCL DFT backend
target_link_options(${a_target} PRIVATE -qmkl=parallel)
# mkl_sycl_dft is the name in MKL >= 2023.2; older versions use monolithic mkl_sycl
find_library(MKL_SYCL_DFT NAMES mkl_sycl_dft mkl_sycl
HINTS "$ENV{MKLROOT}/lib" "$ENV{MKLROOT}/lib/intel64" REQUIRED)
find_library(SYCL_LIB sycl HINTS ENV LIBRARY_PATH "${_ifx_bin}/../lib" REQUIRED)
find_library(OPENCL_LIB OpenCL HINTS ENV LIBRARY_PATH "${_ifx_bin}/../lib" REQUIRED)
target_link_libraries(${a_target} PRIVATE ${MKL_SYCL_DFT} ${SYCL_LIB} ${OPENCL_LIB})
else()
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE hipfort::hipfft)
Expand Down Expand Up @@ -636,9 +714,22 @@ exit 0
target_compile_options(${a_target} PRIVATE "-mp=gpu" "-Minfo=mp")
target_link_options(${a_target} PRIVATE "-mp=gpu")
set_target_properties(${a_target} PROPERTIES Fortran_FLAGS "-mp=gpu -gpu=ccall")
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel")
target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64)
target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64)
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM")
# Intel GPU: OpenMP target offload to SPIR64 (Xe-HPC / Ponte Vecchio).
# GPU FFT uses oneMKL DFTI via the OpenMP dispatch construct.
if (MFC_Intel_AOT)
# AOT: compile to native ISA via ocloc for Intel GPU Max (pvc).
# Avoids JIT zeModuleCreate failures at runtime on Level Zero.
# SHELL: prevents CMake deduplication and preserves the quoted
# "-device pvc" as a single argument to -Xopenmp-target-backend.
target_compile_options(${a_target} PRIVATE -fiopenmp -fopenmp-targets=spir64_gen
"SHELL:-Xopenmp-target-backend \"-device ${MFC_Intel_AOT_DEVICE}\"")
target_link_options(${a_target} PRIVATE -fiopenmp -fopenmp-targets=spir64_gen
"SHELL:-Xopenmp-target-backend \"-device ${MFC_Intel_AOT_DEVICE}\"")
else()
target_compile_options(${a_target} PRIVATE -fiopenmp -fopenmp-targets=spir64)
target_link_options(${a_target} PRIVATE -fiopenmp -fopenmp-targets=spir64)
endif()
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${a_target} PRIVATE -fopenmp)
target_link_options(${a_target} PRIVATE -fopenmp)
Expand Down
20 changes: 20 additions & 0 deletions build_intel_gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#SBATCH -p pvc
#SBATCH -N 1
#SBATCH --gres=gpu:pvc:1
#SBATCH -t 1:30:00
#SBATCH --mem=32G
#SBATCH -o /scratch/user/u.sb27915/MFC-intel/build_intel_gpu.log
#SBATCH -e /scratch/user/u.sb27915/MFC-intel/build_intel_gpu.log
#SBATCH -J mfc-intel-gpu-build

source /etc/profile
module load iimpi/2025a imkl/2025.1.0 CMake/3.31.3 Python/3.13.1
export I_MPI_F90=ifx FC=mpif90
export UV_CACHE_DIR=/scratch/user/u.sb27915/.cache/uv
export RUSTUP_HOME=/scratch/user/u.sb27915/.rustup
export CARGO_HOME=/scratch/user/u.sb27915/.cargo
export PATH=$CARGO_HOME/bin:$PATH

cd /scratch/user/u.sb27915/MFC-intel
./mfc.sh build -t pre_process simulation --gpu mp --intel-aot -j 8
Loading
Loading