Skip to content
Open
13 changes: 10 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ exit 0
target_link_options(${a_target} PRIVATE -fopenmp)
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS})
endif()
endif()

Expand Down Expand Up @@ -710,14 +710,15 @@ exit 0
PRIVATE -DFRONTIER_UNIFIED)
endif()

find_library(HIP_LIB amdhip64
find_library(HIP_LIB amdhip64
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
target_include_directories(${a_target} PRIVATE
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
target_link_libraries(${a_target} PRIVATE
${HIP_LIB} ${HIPFORT_AMDGCN_LIB} flang_rt.hostdevice)
${HIP_LIB} ${HIPFORT_AMDGCN_LIB})

endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
Expand Down Expand Up @@ -790,6 +791,12 @@ if (MFC_POST_PROCESS)

# -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95
target_compile_options(post_process PRIVATE -O0)

# flang-23/LLD defaults to PIE; SILO and LAPACK static libs on Frontier are
# non-PIC, producing R_X86_64_32 relocations that LLD rejects in PIE mode.
if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_link_options(post_process PRIVATE -no-pie)
endif()
endif()

if (MFC_SYSCHECK)
Expand Down
2 changes: 1 addition & 1 deletion examples/3D_performance_test/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
json.dumps(
{
# Logistics
"run_time_info": "T",
"run_time_info": "F",
# Computational Domain Parameters
"x_domain%beg": 0.0e00,
"x_domain%end": 4.0e-03 / 1.0e-03,
Expand Down
2 changes: 1 addition & 1 deletion src/common/m_chemistry.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ contains

$:GPU_UPDATE(device='[isc1, isc2, isc3]')

if (chemistry .or. dummy) then
if (chemistry) then
! Set offsets based on direction using array indexing
offsets = 0
offsets(idir) = 1
Expand Down
2 changes: 0 additions & 2 deletions src/post_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ module m_global_parameters
logical :: E_wrt
logical, dimension(num_fluids_max) :: alpha_rho_e_wrt
logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
logical :: pres_wrt
logical, dimension(num_fluids_max) :: alpha_wrt
logical :: gamma_wrt
Expand Down Expand Up @@ -397,7 +396,6 @@ contains
file_per_process = .false.
E_wrt = .false.
fft_wrt = .false.
dummy = .false.
pres_wrt = .false.
alpha_wrt = .false.
gamma_wrt = .false.
Expand Down
2 changes: 0 additions & 2 deletions src/pre_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ module m_global_parameters
real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D)
integer :: buff_size !< Number of ghost cells for boundary condition storage
logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug

contains

Expand Down Expand Up @@ -303,7 +302,6 @@ contains
elliptic_smoothing = .false.

fft_wrt = .false.
dummy = .false.

simplex_perturb = .false.
simplex_params%perturb_vel(:) = .false.
Expand Down
18 changes: 10 additions & 8 deletions src/simulation/m_acoustic_src.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,14 +454,16 @@ contains
call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials')
end if

$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
if (support(ai) >= 5) then
if (dim == 2) then
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
end if
if (dim == 3) then
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
if (count > 0) then
$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
if (support(ai) >= 5) then
if (dim == 2) then
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
end if
if (dim == 3) then
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
end if
end if
end if
end do
Expand Down
4 changes: 2 additions & 2 deletions src/simulation/m_cbc.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ contains
#:for CBC_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (cbc_dir == ${CBC_DIR}$ .and. recon_type == WENO_TYPE) then
! PI2 of flux_rs_vf and flux_src_rs_vf at j = 1/2
if (weno_order == 3 .or. dummy) then
if (weno_order == 3) then
call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, &
& is3, idwbuff(2)%beg, idwbuff(3)%beg)

Expand Down Expand Up @@ -557,7 +557,7 @@ contains
end if

! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2
if (weno_order == 5 .or. dummy) then
if (weno_order == 5) then
call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, &
& is3, idwbuff(2)%beg, idwbuff(3)%beg)

Expand Down
2 changes: 1 addition & 1 deletion src/simulation/m_data_output.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ contains
#:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]')
icfl_max_loc = maxval(icfl_sf)
#:endcall GPU_PARALLEL
if (viscous .or. dummy) then
if (viscous) then
#:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]')
vcfl_max_loc = maxval(vcfl_sf)
Rc_min_loc = minval(Rc_sf)
Expand Down
19 changes: 11 additions & 8 deletions src/simulation/m_fftw.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ module m_fftw

type(c_ptr) :: fwd_plan, bwd_plan
type(c_ptr) :: fftw_real_data, fftw_cmplx_data, fftw_fltr_cmplx_data
integer :: real_size, cmplx_size, x_size, batch_size, Nfq
integer :: real_size, cmplx_size, x_size, batch_size, Nfq, i2
real(c_double), pointer :: data_real(:) !< Real data
complex(c_double_complex), pointer :: data_cmplx(:) !< Complex data in Fourier space
complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< Filtered complex data in Fourier space
#if defined(MFC_GPU)
$:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq]')
$:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq, i2]')

real(dp), allocatable, target :: data_real_gpu(:)
complex(dp), allocatable, target :: data_cmplx_gpu(:)
Expand Down Expand Up @@ -76,8 +76,8 @@ contains
allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))

gpu_fft_size(1) = real_size
iembed(1) = 0
oembed(1) = 0
iembed(1) = real_size
oembed(1) = cmplx_size
$:GPU_ENTER_DATA(copyin='[real_size, cmplx_size, x_size, sys_size, batch_size, Nfq]')
$:GPU_UPDATE(device='[real_size, cmplx_size, x_size, sys_size, batch_size]')
#else
Expand Down Expand Up @@ -189,6 +189,9 @@ contains
$:END_GPU_PARALLEL_LOOP()

do i = 1, fourier_rings
i2 = i
$:GPU_UPDATE(device='[i2]')

$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
Expand All @@ -199,11 +202,11 @@ contains
end do
$:END_GPU_PARALLEL_LOOP()

$:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 0, p
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i2, l)
end do
end do
end do
Expand Down Expand Up @@ -241,13 +244,13 @@ contains
#endif
#:endcall GPU_HOST_DATA

$:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 0, p
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k &
& - 1)*real_size*x_size)/real(real_size, dp)
q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
q_cons_vf(k)%sf(j, i2, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
end do
end do
end do
Expand Down
2 changes: 0 additions & 2 deletions src/simulation/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,6 @@ module m_global_parameters
$:GPU_DECLARE(create='[Bx0]')

logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
!> @name Continuum damage model parameters
!> @{!
real(wp) :: tau_star !< Stress threshold for continuum damage modeling
Expand Down Expand Up @@ -695,7 +694,6 @@ contains
#:endfor

fft_wrt = .false.
dummy = .false.

do j = 1, num_probes_max
acoustic(j)%pulse = dflt_int
Expand Down
2 changes: 1 addition & 1 deletion src/simulation/m_igr.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ contains

call s_populate_F_igr_buffers(bc_type, jac_sf)

if (igr_iter_solver == 1 .or. dummy) then ! Jacobi iteration
if (igr_iter_solver == 1) then ! Jacobi iteration
$:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
Expand Down
Loading
Loading