From 2598b56cfaec69eb7945d81ceac6f274a51b03a9 Mon Sep 17 00:00:00 2001 From: JacksonCampolattaro Date: Sun, 16 Jun 2024 19:41:39 +0200 Subject: [PATCH] Use forced-inlining to squeeze better performance out of the 'einsum' version of the gravity derivatives. --- benchmarks/multipoleMoment.cpp | 12 +++---- include/symtensor/Index.h | 2 +- include/symtensor/SymmetricTensorBase.h | 5 +-- include/symtensor/gravity.h | 44 ++++++++++++++++--------- include/symtensor/util.h | 30 +++++++++++++++++ 5 files changed, 68 insertions(+), 25 deletions(-) diff --git a/benchmarks/multipoleMoment.cpp b/benchmarks/multipoleMoment.cpp index 18aa461..b73ed97 100644 --- a/benchmarks/multipoleMoment.cpp +++ b/benchmarks/multipoleMoment.cpp @@ -29,12 +29,12 @@ TEST_CASE("benchmark: Gravity derivatives construction", "[Gravity]") { CHECK((gravity::D<3>(R) - gravity::derivative<3>(R)).norm() < 1e-7); CHECK((gravity::D<4>(R) - gravity::derivative<4>(R)).norm() < 1e-7); - // BENCHMARK("D' Construction") { return gravity::D<1>(R); }; - // BENCHMARK("D' Construction *") { return gravity::derivative<1>(R); }; - // BENCHMARK("D'' Construction") { return gravity::D<2>(R); }; - // BENCHMARK("D'' Construction *") { return gravity::derivative<2>(R); }; - // BENCHMARK("D''' Construction") { return gravity::D<3>(R); }; - // BENCHMARK("D''' Construction *") { return gravity::derivative<3>(R); }; + BENCHMARK("D' Construction") { return gravity::D<1>(R); }; + BENCHMARK("D' Construction *") { return gravity::derivative<1>(R); }; + BENCHMARK("D'' Construction") { return gravity::D<2>(R); }; + BENCHMARK("D'' Construction *") { return gravity::derivative<2>(R); }; + BENCHMARK("D''' Construction") { return gravity::D<3>(R); }; + BENCHMARK("D''' Construction *") { return gravity::derivative<3>(R); }; BENCHMARK("D'''' Construction") { return gravity::D<4>(R); }; BENCHMARK("D'''' Construction *") { return gravity::derivative<4>(R); }; } diff --git a/include/symtensor/Index.h b/include/symtensor/Index.h index c51edf9..16f8de7 100644 --- a/include/symtensor/Index.h +++ b/include/symtensor/Index.h @@ -238,7 +238,7 @@ namespace symtensor { if (R == 1) return static_cast(dimensionalIndices[0]) - lowestIndex; // Ensure the indices are in the canonical order - std::sort(dimensionalIndices.begin(), dimensionalIndices.end()); + dimensionalIndices = sorted(dimensionalIndices); if (dimensionalIndices[0] == I{lowestIndex}) { // If the first index is X, then we know we are in the first portion of the range, diff --git a/include/symtensor/SymmetricTensorBase.h b/include/symtensor/SymmetricTensorBase.h index 40477e9..27b7adb 100644 --- a/include/symtensor/SymmetricTensorBase.h +++ b/include/symtensor/SymmetricTensorBase.h @@ -215,15 +215,16 @@ namespace symtensor { * @return a symmetric tensor with each value set by evaluating the function. */ template + [[clang::always_inline, gnu::always_inline]] inline static constexpr Implementation NullaryExpression(F function = {}) { if constexpr (requires { function.template operator()(); }) { // If a function provides a template parameter for compile-time indexing, prefer that - return [&](std::index_sequence) constexpr { + return [&](std::index_sequence) __attribute__((always_inline)) constexpr { return Implementation{static_cast(function.template operator()())...}; }(std::make_index_sequence()); } else { // Otherwise, the function must take the indices as its only argument - return [&](std::index_sequence) constexpr { + return [&](std::index_sequence) __attribute__((always_inline)) constexpr { return Implementation{static_cast(function(dimensionalIndices(i)))...}; }(std::make_index_sequence()); } diff --git a/include/symtensor/gravity.h b/include/symtensor/gravity.h index 4eef151..10b7175 100644 --- a/include/symtensor/gravity.h +++ b/include/symtensor/gravity.h @@ -10,6 +10,8 @@ namespace symtensor::gravity { + + template inline constexpr auto product_of_elements(const Vector &v) { // todo: maybe don't use std::apply for this? @@ -31,12 +33,19 @@ namespace symtensor::gravity { } template - inline auto derivative_at(const Vector &R, const auto &g) { + [[clang::always_inline, gnu::always_inline]] inline auto derivative_at(const Vector &R) { + + auto r = glm::length(R); + auto g1 = -1.0f / pow<3>(r); + auto g2 = 3.0f / pow<5>(r); + auto g3 = -15.0f / pow<7>(r); + auto g4 = 105.0f / pow<9>(r); + auto cartesian_product_term = product_of_elements(R); if constexpr (N == 1) { - return R[static_cast(index[0])] * g[1]; + return R[static_cast(index[0])] * g1; } else if constexpr (N == 2) { - return g[1] * kronecker_delta(index) + g[2] * cartesian_product_term; + return g1 * kronecker_delta(index) + g2 * cartesian_product_term; } else if constexpr (N == 3) { constexpr auto partitions = binomial_partitions<1>(index); auto kronecker_product_term = [&](std::index_sequence) constexpr { @@ -53,9 +62,9 @@ namespace symtensor::gravity { // R[static_cast(std::get<0>(partition)[0])] : 0 // ) + ...); // }, partitions); - return g[2] * kronecker_product_term + g[3] * cartesian_product_term; + return g2 * kronecker_product_term + g3 * cartesian_product_term; } else if constexpr (N == 4) { - auto result = g[4] * cartesian_product_term; + auto result = g4 * cartesian_product_term; constexpr auto partitions = binomial_partitions<2>(index); { constexpr auto kronecker_term = [&](std::index_sequence) constexpr { @@ -65,7 +74,7 @@ namespace symtensor::gravity { ) + ...); }(std::make_index_sequence()); if constexpr (kronecker_term > 0) - result += g[2] * kronecker_term; + result += g2 * kronecker_term; } { // constexpr auto nonzero_index_pairs = filter()); if constexpr (kronecker_product_is_nonzero) - result += g[3] * kronecker_product_term; + result += g3 * kronecker_product_term; } return result; @@ -102,16 +111,10 @@ namespace symtensor::gravity { } template + [[clang::always_inline, gnu::always_inline]] inline auto derivative(const Vector &R) { - auto r = glm::length(R); - // todo: factor these out into their own function - auto g1 = -1.0f / pow<3>(r); - auto g2 = 3.0f / pow<5>(r); - auto g3 = -15.0f / pow<7>(r); - auto g4 = 105.0f / pow<9>(r); - auto g_coeffs = std::array{1.0, g1, g2, g3, g4}; - return SymmetricTensor3f::NullaryExpression([&]() constexpr { - return derivative_at(R, g_coeffs); + return SymmetricTensor3f::NullaryExpression([&]() __attribute__((always_inline)) constexpr { + return derivative_at(R); }); }; @@ -199,6 +202,15 @@ namespace symtensor::gravity { } } + + auto derivative4(const glm::vec3 &R) { + return derivative<4>(R); + } + + auto D4(const glm::vec3 &R) { + return D<4>(R); + } + } #endif //SYMTENSOR_GRAVITY_H diff --git a/include/symtensor/util.h b/include/symtensor/util.h index f7426b9..e9935a2 100644 --- a/include/symtensor/util.h +++ b/include/symtensor/util.h @@ -85,6 +85,12 @@ namespace symtensor { } } + template + inline constexpr std::array sorted(std::array array) { + std::sort(array.begin(), array.end()); + return array; + } + template constexpr std::array concatenate(const std::array &a, const std::array &b) { std::array result{}; @@ -271,6 +277,8 @@ namespace symtensor { else *(b_out++) = set[i]; } + get<0>(*pair_out) = sorted(get<0>(*pair_out)); + get<1>(*pair_out) = sorted(get<1>(*pair_out)); pair_out++; } while (std::prev_permutation(selection.begin(), selection.end())); @@ -371,6 +379,28 @@ namespace symtensor { return sum; } + template + constexpr std::array countAppearances(const std::array &arr) { + std::array counts = {}; + for (std::size_t i = 0; i < N; ++i) { + for (std::size_t j = 0; j < N; ++j) { + if (arr[i] == arr[j]) { + ++counts[i]; + } + } + } + return counts; + } + + template + constexpr std::size_t numUniquePermutations(const std::array &arr) { + const auto counts = countOccurrences(arr); + unsigned long long divisor = 1; + for (std::size_t i = 0; i < N; ++i) + divisor *= factorial(counts[i]); + return factorial(N) / divisor; + } + template inline constexpr auto deduplicated_sum(const std::array &values, F function = {}) { // todo: This may be very important for good performance!