Skip to content
Open
6 changes: 3 additions & 3 deletions include/Definitions/geometry_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ inline void compute_jacobian_elements(const DomainGeometry& domain_geometry, dou
/* which is represented by: */
/* [arr, 0.5*art] */
/* [0.5*atr, att] */
arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / fabs(detDF);
att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / fabs(detDF);
art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / fabs(detDF);
arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / std::fabs(detDF);
att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / std::fabs(detDF);
art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / std::fabs(detDF);
/* Note that the inverse Jacobian matrix DF^{-1} is: */
/* 1.0 / det(DF) * */
/* [Jtt, -Jrt] */
Expand Down
44 changes: 0 additions & 44 deletions include/Definitions/global_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,50 +79,6 @@ enum class BetaCoeff
ALPHA_INVERSE = 1
};

/* ---------------------------- */
/* Mumps - Constant Definitions */
/* ---------------------------- */
#ifdef GMGPOLAR_USE_MUMPS
#include "dmumps_c.h"
/* Mumps inline functions s.t. indices match documentation */
inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.icntl[(I)-1];
}
inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.cntl[(I)-1];
}
inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.infog[(I)-1];
}

constexpr int USE_COMM_WORLD = -987654;
constexpr int PAR_NOT_PARALLEL = 0;
constexpr int PAR_PARALLEL = 1;

constexpr int JOB_INIT = -1;
constexpr int JOB_END = -2;
constexpr int JOB_REMOVE_SAVED_DATA = -3;
constexpr int JOB_FREE_INTERNAL_DATA = -4;
constexpr int JOB_SUPPRESS_OOC_FILES = -200;

constexpr int JOB_ANALYSIS_PHASE = 1;
constexpr int JOB_FACTORIZATION_PHASE = 2;
constexpr int JOB_COMPUTE_SOLUTION = 3;
constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4;
constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5;
constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6;
constexpr int JOB_SAVE_INTERNAL_DATA = 7;
constexpr int JOB_RESTORE_INTERNAL_DATA = 8;
constexpr int JOB_DISTRIBUTE_RHS = 9;

constexpr int SYM_UNSYMMETRIC = 0;
constexpr int SYM_POSITIVE_DEFINITE = 1;
constexpr int SYM_GENERAL_SYMMETRIC = 2;
#endif

// --------------------------------------- //
// Function-like macros for LIKWID markers //
// --------------------------------------- //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down Expand Up @@ -184,13 +184,13 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double bottom_value = -coeff3 * (att(center_index) + att(bottom_index)); /* Bottom */
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);
double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);

double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */
double top_right_value = -0.25 * (art(right_index) + art(top_index)); /* Top Right */
Expand Down Expand Up @@ -276,7 +276,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down Expand Up @@ -383,7 +383,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down
117 changes: 50 additions & 67 deletions include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -798,80 +798,63 @@ SparseMatrixCSR<double> DirectSolver_CSR_LU_Give<LevelCacheType>::buildSolverMat

SparseMatrixCSR<double> solver_matrix(n, n, nnz_per_row);

if (num_omp_threads == 1) {
/* Single-threaded execution */
for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
}
else {
/* Multi-threaded execution: For Loops */
const int num_circle_tasks = grid.numberSmootherCircles();
const int additional_radial_tasks = grid.ntheta() % 3;
const int num_radial_tasks = grid.ntheta() - additional_radial_tasks;

const int num_smoother_circles = grid.numberSmootherCircles();
const int additional_radial_tasks = grid.ntheta() % 3;
const int num_radial_tasks = grid.ntheta() - additional_radial_tasks;

/* ---------------- */
/* Circular section */
/* ---------------- */
// We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
#pragma omp parallel num_threads(num_omp_threads)
{
{
#pragma omp for
for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
Comment thread
EmilyBourne marked this conversation as resolved.
#pragma omp for
for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
#pragma omp for nowait
for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
}

/* ---------------- */
/* Radial section */
/* ---------------- */
// We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
// Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
// that are handled serially before the parallel loops.
if (additional_radial_tasks > 0) {
const int i_theta = 0;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}

if (additional_radial_tasks > 1) {
const int i_theta = 1;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}

#pragma omp parallel num_threads(num_omp_threads)
{
#pragma omp for
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 0) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
else {
if (additional_radial_tasks == 0) {
buildSolverMatrixRadialSection(0, solver_matrix);
}
else if (additional_radial_tasks >= 1) {
buildSolverMatrixRadialSection(0, solver_matrix);
buildSolverMatrixRadialSection(1, solver_matrix);
}
}
}
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 1) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
else {
if (additional_radial_tasks == 0) {
buildSolverMatrixRadialSection(1, solver_matrix);
}
else if (additional_radial_tasks == 1) {
buildSolverMatrixRadialSection(2, solver_matrix);
}
else if (additional_radial_tasks == 2) {
buildSolverMatrixRadialSection(2, solver_matrix);
buildSolverMatrixRadialSection(3, solver_matrix);
}
}
}
Comment thread
julianlitz marked this conversation as resolved.
for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
Comment thread
EmilyBourne marked this conversation as resolved.
#pragma omp for
for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
}
for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
}

return solver_matrix;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,6 @@ class DirectSolver_CSR_LU_Give : public DirectSolver<LevelCacheType>
-1, 0, -1,
-1, -1, -1
};
const Stencil stencil_next_inner_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
const Stencil stencil_next_outer_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
// clang-format on

SparseMatrixCSR<double> buildSolverMatrix();
Expand Down
39 changes: 11 additions & 28 deletions include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
int i_r, i_theta;
grid.multiIndex(global_index, i_r, i_theta);

const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
const int size_stencil_interior = 9;
const int size_stencil_next_outer_boundary = 9;
const int size_stencil_outer_boundary = 1;
const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_interior = 9;
const int size_stencil_outer_boundary = 1;

if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
if ((i_r > 0 && i_r < grid.nr() - 1)) {
Comment thread
julianlitz marked this conversation as resolved.
return size_stencil_interior;
}
else if (i_r == 0 && !DirBC_Interior) {
Expand All @@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
return size_stencil_outer_boundary;
}
else if (i_r == 1 && DirBC_Interior) {
return size_stencil_next_inner_boundary;
}
else if (i_r == grid.nr() - 2) {
return size_stencil_next_outer_boundary;
}

throw std::out_of_range("Invalid index for stencil");
}

Expand All @@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
assert(0 <= i_r && i_r < grid.nr());
assert(grid.nr() >= 4);

if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
if ((i_r > 0 && i_r < grid.nr() - 1)) {
return stencil_interior_;
}
else if (i_r == 0 && !DirBC_Interior) {
Expand All @@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
return stencil_DB_;
}
else if (i_r == 1 && DirBC_Interior) {
return stencil_next_inner_DB_;
}
else if (i_r == grid.nr() - 2) {
return stencil_next_outer_DB_;
}

throw std::out_of_range("Invalid index for stencil");
}

Expand All @@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getNonZeroCountSolverMatrix() cons
const PolarGrid& grid = DirectSolver<LevelCacheType>::grid_;
const bool DirBC_Interior = DirectSolver<LevelCacheType>::DirBC_Interior_;

const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
const int size_stencil_interior = 9;
const int size_stencil_next_outer_boundary = 9;
const int size_stencil_outer_boundary = 1;

assert(grid.nr() >= 4);
const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_interior = 9;
const int size_stencil_outer_boundary = 1;

return grid.ntheta() *
(size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior +
size_stencil_next_outer_boundary + size_stencil_outer_boundary);
(size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary);
}
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@ void DirectSolver_CSR_LU_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double bottom_value = -coeff3 * (att[center_index] + att[bottom_index]); /* Bottom */
double top_value = -coeff4 * (att[center_index] + att[top_index]); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF[center_index]) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);
double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
std::fabs(detDF[center_index]) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);

double bottom_right_value = +0.25 * (art[right_index] + art[bottom_index]); /* Bottom Right */
double top_right_value = -0.25 * (art[right_index] + art[top_index]); /* Top Right */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver<LevelCacheType>
-1, 0, -1,
-1, -1, -1
};
const Stencil stencil_next_inner_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
const Stencil stencil_next_outer_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
// clang-format on

SparseMatrixCSR<double> buildSolverMatrix();
Expand Down
Loading
Loading