Skip to content

Commit

Permalink
Use forced-inlining to squeeze better performance out of the 'einsum'…
Browse files Browse the repository at this point in the history
… version of the gravity derivatives.
  • Loading branch information
JacksonCampolattaro committed Jun 16, 2024
1 parent 147be89 commit 2598b56
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 25 deletions.
12 changes: 6 additions & 6 deletions benchmarks/multipoleMoment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ TEST_CASE("benchmark: Gravity derivatives construction", "[Gravity]") {
CHECK((gravity::D<3>(R) - gravity::derivative<3>(R)).norm() < 1e-7);
CHECK((gravity::D<4>(R) - gravity::derivative<4>(R)).norm() < 1e-7);

// BENCHMARK("D' Construction") { return gravity::D<1>(R); };
// BENCHMARK("D' Construction *") { return gravity::derivative<1>(R); };
// BENCHMARK("D'' Construction") { return gravity::D<2>(R); };
// BENCHMARK("D'' Construction *") { return gravity::derivative<2>(R); };
// BENCHMARK("D''' Construction") { return gravity::D<3>(R); };
// BENCHMARK("D''' Construction *") { return gravity::derivative<3>(R); };
BENCHMARK("D' Construction") { return gravity::D<1>(R); };
BENCHMARK("D' Construction *") { return gravity::derivative<1>(R); };
BENCHMARK("D'' Construction") { return gravity::D<2>(R); };
BENCHMARK("D'' Construction *") { return gravity::derivative<2>(R); };
BENCHMARK("D''' Construction") { return gravity::D<3>(R); };
BENCHMARK("D''' Construction *") { return gravity::derivative<3>(R); };
BENCHMARK("D'''' Construction") { return gravity::D<4>(R); };
BENCHMARK("D'''' Construction *") { return gravity::derivative<4>(R); };
}
2 changes: 1 addition & 1 deletion include/symtensor/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ namespace symtensor {
if (R == 1) return static_cast<std::size_t>(dimensionalIndices[0]) - lowestIndex;

// Ensure the indices are in the canonical order
std::sort(dimensionalIndices.begin(), dimensionalIndices.end());
dimensionalIndices = sorted(dimensionalIndices);

if (dimensionalIndices[0] == I{lowestIndex}) {
// If the first index is X, then we know we are in the first portion of the range,
Expand Down
5 changes: 3 additions & 2 deletions include/symtensor/SymmetricTensorBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,16 @@ namespace symtensor {
* @return a symmetric tensor with each value set by evaluating the function.
*/
template<typename F>
[[clang::always_inline, gnu::always_inline]]
inline static constexpr Implementation NullaryExpression(F function = {}) {
if constexpr (requires { function.template operator()<dimensionalIndices(0)>(); }) {
// If a function provides a template parameter for compile-time indexing, prefer that
return [&]<std::size_t... i>(std::index_sequence<i...>) constexpr {
return [&]<std::size_t... i>(std::index_sequence<i...>) __attribute__((always_inline)) constexpr {
return Implementation{static_cast<S>(function.template operator()<dimensionalIndices(i)>())...};
}(std::make_index_sequence<NumUniqueValues>());
} else {
// Otherwise, the function must take the indices as its only argument
return [&]<std::size_t... i>(std::index_sequence<i...>) constexpr {
return [&]<std::size_t... i>(std::index_sequence<i...>) __attribute__((always_inline)) constexpr {
return Implementation{static_cast<S>(function(dimensionalIndices(i)))...};
}(std::make_index_sequence<NumUniqueValues>());
}
Expand Down
44 changes: 28 additions & 16 deletions include/symtensor/gravity.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

namespace symtensor::gravity {



template<auto index, indexable Vector>
inline constexpr auto product_of_elements(const Vector &v) {
// todo: maybe don't use std::apply for this?
Expand All @@ -31,12 +33,19 @@ namespace symtensor::gravity {
}

template<auto index, std::size_t N, indexable Vector>
inline auto derivative_at(const Vector &R, const auto &g) {
[[clang::always_inline, gnu::always_inline]] inline auto derivative_at(const Vector &R) {

auto r = glm::length(R);
auto g1 = -1.0f / pow<3>(r);
auto g2 = 3.0f / pow<5>(r);
auto g3 = -15.0f / pow<7>(r);
auto g4 = 105.0f / pow<9>(r);

auto cartesian_product_term = product_of_elements<index>(R);
if constexpr (N == 1) {
return R[static_cast<std::size_t>(index[0])] * g[1];
return R[static_cast<std::size_t>(index[0])] * g1;
} else if constexpr (N == 2) {
return g[1] * kronecker_delta<float>(index) + g[2] * cartesian_product_term;
return g1 * kronecker_delta<float>(index) + g2 * cartesian_product_term;
} else if constexpr (N == 3) {
constexpr auto partitions = binomial_partitions<1>(index);
auto kronecker_product_term = [&]<auto... i>(std::index_sequence<i...>) constexpr {
Expand All @@ -53,9 +62,9 @@ namespace symtensor::gravity {
// R[static_cast<std::size_t>(std::get<0>(partition)[0])] : 0
// ) + ...);
// }, partitions);
return g[2] * kronecker_product_term + g[3] * cartesian_product_term;
return g2 * kronecker_product_term + g3 * cartesian_product_term;
} else if constexpr (N == 4) {
auto result = g[4] * cartesian_product_term;
auto result = g4 * cartesian_product_term;
constexpr auto partitions = binomial_partitions<2>(index);
{
constexpr auto kronecker_term = [&]<auto... i>(std::index_sequence<i...>) constexpr {
Expand All @@ -65,7 +74,7 @@ namespace symtensor::gravity {
) + ...);
}(std::make_index_sequence<partitions.size() / 2>());
if constexpr (kronecker_term > 0)
result += g[2] * kronecker_term;
result += g2 * kronecker_term;
}
{
// constexpr auto nonzero_index_pairs = filter<partitions, [](auto p) constexpr {
Expand Down Expand Up @@ -93,7 +102,7 @@ namespace symtensor::gravity {
) + ...);
}(std::make_index_sequence<partitions.size()>());
if constexpr (kronecker_product_is_nonzero)
result += g[3] * kronecker_product_term;
result += g3 * kronecker_product_term;
}

return result;
Expand All @@ -102,16 +111,10 @@ namespace symtensor::gravity {
}

template<std::size_t N, indexable Vector>
[[clang::always_inline, gnu::always_inline]]
inline auto derivative(const Vector &R) {
auto r = glm::length(R);
// todo: factor these out into their own function
auto g1 = -1.0f / pow<3>(r);
auto g2 = 3.0f / pow<5>(r);
auto g3 = -15.0f / pow<7>(r);
auto g4 = 105.0f / pow<9>(r);
auto g_coeffs = std::array<decltype(r), 5>{1.0, g1, g2, g3, g4};
return SymmetricTensor3f<N>::NullaryExpression([&]<auto index>() constexpr {
return derivative_at<index, N>(R, g_coeffs);
return SymmetricTensor3f<N>::NullaryExpression([&]<auto index>() __attribute__((always_inline)) constexpr {
return derivative_at<index, N>(R);
});

};
Expand Down Expand Up @@ -199,6 +202,15 @@ namespace symtensor::gravity {
}
}


auto derivative4(const glm::vec3 &R) {
return derivative<4>(R);
}

auto D4(const glm::vec3 &R) {
return D<4>(R);
}

}

#endif //SYMTENSOR_GRAVITY_H
30 changes: 30 additions & 0 deletions include/symtensor/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ namespace symtensor {
}
}

template<std::size_t N, typename T>
inline constexpr std::array<T, N> sorted(std::array<T, N> array) {
std::sort(array.begin(), array.end());
return array;
}

template<typename T, std::size_t NA, std::size_t NB>
constexpr std::array<T, NA + NB> concatenate(const std::array<T, NA> &a, const std::array<T, NB> &b) {
std::array<T, NA + NB> result{};
Expand Down Expand Up @@ -271,6 +277,8 @@ namespace symtensor {
else
*(b_out++) = set[i];
}
get<0>(*pair_out) = sorted(get<0>(*pair_out));
get<1>(*pair_out) = sorted(get<1>(*pair_out));

pair_out++;
} while (std::prev_permutation(selection.begin(), selection.end()));
Expand Down Expand Up @@ -371,6 +379,28 @@ namespace symtensor {
return sum;
}

template<typename T, std::size_t N>
constexpr std::array<std::size_t, N> countAppearances(const std::array<T, N> &arr) {
std::array<std::size_t, N> counts = {};
for (std::size_t i = 0; i < N; ++i) {
for (std::size_t j = 0; j < N; ++j) {
if (arr[i] == arr[j]) {
++counts[i];
}
}
}
return counts;
}

template<typename T, std::size_t N>
constexpr std::size_t numUniquePermutations(const std::array<T, N> &arr) {
const auto counts = countOccurrences(arr);
unsigned long long divisor = 1;
for (std::size_t i = 0; i < N; ++i)
divisor *= factorial(counts[i]);
return factorial(N) / divisor;
}

template<typename F, typename T, std::size_t N>
inline constexpr auto deduplicated_sum(const std::array<T, N> &values, F function = {}) {
// todo: This may be very important for good performance!
Expand Down

0 comments on commit 2598b56

Please sign in to comment.