From df084bc392c2b337158f61d2ea7676dd0b47b8b3 Mon Sep 17 00:00:00 2001
From: Tomas Jira <tom.jira@gmail.com>
Date: Fri, 26 Jul 2024 15:53:24 +0200
Subject: [PATCH] contractions in MP theory now use torch

---
 CMakeLists.txt            |  42 +++++++------
 program/mp/CMakeLists.txt |   3 +-
 program/mp/src/main.cpp   | 129 ++++++++++++++++----------------------
 3 files changed, 81 insertions(+), 93 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ecde410..e7641ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,22 +36,24 @@ if (STATIC)
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -static")
 endif()
 
-if (WIN32)
+# declare Torch library for fast tensor contractions
+FetchContent_Declare(torch SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcpu.zip)
+FetchContent_Declare(libint SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://github.com/evaleev/libint/releases/download/v2.8.2/libint-2.8.2-mpqc4.tgz)
+FetchContent_Declare(eigen SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz)
+FetchContent_Declare(fftw SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://www.fftw.org/fftw-3.3.10.tar.gz)
+
+# download the Torch library
+FetchContent_MakeAvailable(torch)
 
+# add torch prefix path
+list(APPEND CMAKE_PREFIX_PATH ${torch_SOURCE_DIR}/share/cmake/Torch)
+
+if (WIN32)
     # set additional compiler flags for windows
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wa,-mbig-obj")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wa,-mbig-obj")
 
-    # declare the libint library on github releases
-    FetchContent_Declare(libint SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://github.com/evaleev/libint/releases/download/v2.8.2/libint-2.8.2-mpqc4.tgz)
-
-    # declare the eigen release for linear algebra
-    FetchContent_Declare(eigen SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz)
-
-    # declare the fftw library on the fftw webpage
-    FetchContent_Declare(fftw SYSTEM DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL https://www.fftw.org/fftw-3.3.10.tar.gz)
-
-    # download these libraries
+    # download the eigen, fftw and libint library
     FetchContent_MakeAvailable(eigen fftw libint)
 
     # read the problematic libint file
@@ -75,6 +77,10 @@ include_directories(example/diagram include lib)
 # find the necessary packages
 find_package(Eigen3 REQUIRED)
 find_package(OpenMP REQUIRED)
+find_package(Torch  REQUIRED)
+
+# add torch compiler flags
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${TORCH_CXX_FLAGS}")
 
 # add the base library
 add_library(acorn_base STATIC src/expression.cpp src/linalg.cpp src/timer.cpp)
@@ -83,13 +89,13 @@ add_library(acorn_base STATIC src/expression.cpp src/linalg.cpp src/timer.cpp)
 target_link_libraries(acorn_base Eigen3::Eigen)
 
 # add subprograms
-add_subdirectory(program/expression)
-add_subdirectory(program/transform)
-add_subdirectory(program/integral)
-add_subdirectory(program/cdyn)
-add_subdirectory(program/qdyn)
-add_subdirectory(program/ci)
-add_subdirectory(program/hf)
+# add_subdirectory(program/expression)
+# add_subdirectory(program/transform)
+# add_subdirectory(program/integral)
+# add_subdirectory(program/cdyn)
+# add_subdirectory(program/qdyn)
+# add_subdirectory(program/ci)
+# add_subdirectory(program/hf)
 add_subdirectory(program/mp)
 
 # enable testing
diff --git a/program/mp/CMakeLists.txt b/program/mp/CMakeLists.txt
index 1191b7e..f414a4b 100644
--- a/program/mp/CMakeLists.txt
+++ b/program/mp/CMakeLists.txt
@@ -5,7 +5,8 @@ add_executable(acorn_mp src/main.cpp)
 target_include_directories(acorn_mp PRIVATE include)
 
 # link libraries
-target_link_libraries(acorn_mp acorn_base)
+target_link_libraries(acorn_mp acorn_base ${TORCH_LIBRARIES})
+
 # link OpenMP
 if(OpenMP_CXX_FOUND AND CMAKE_BUILD_TYPE STREQUAL "Release")
     target_link_libraries(acorn_mp OpenMP::OpenMP_CXX)
diff --git a/program/mp/src/main.cpp b/program/mp/src/main.cpp
index 6b0d8b8..4dab1e7 100644
--- a/program/mp/src/main.cpp
+++ b/program/mp/src/main.cpp
@@ -1,6 +1,9 @@
 #include "linalg.h"
 #include "timer.h"
 #include <argparse.hpp>
+#include <torch/torch.h>
+
+using namespace torch::indexing;
 
 #define TOSTRING(x) #x
 #define OCC "ijklmnop"
@@ -26,61 +29,11 @@ std::string {
 
 int nthread = 1;
 
-double loop(const std::vector<std::pair<Tensor<4>, std::vector<int>>>& Jcs, const std::pair<Vector, std::vector<std::vector<int>>>& Ecs, const std::vector<int> dims, std::vector<int> index = {}, size_t i = 0) {
-    size_t rank = dims.size(), nos = dims.front(), nvs = dims.back(); double E = 1, D = 0; if (i == 0) index.resize(rank);
-
-    if (i != rank) {
-        for (int j = 0; i < rank && j < dims.at(i); j++) {
-            index.at(i) = j, E += loop(Jcs, Ecs, dims, index, i + 1);
-        }
-    } else {
-        for (const std::pair<Tensor<4>, std::vector<int>>& contr : Jcs) {
-            E *= contr.first(index.at(contr.second.at(0)), index.at(contr.second.at(1)), index.at(contr.second.at(2)), index.at(contr.second.at(3)));
-        }
-        for (size_t j = 0; j < Ecs.second.size(); j++) {
-            for (size_t k = 0, l = Ecs.second.at(j).size(); k < l / 2; k++) D -= Ecs.first(nos + index.at(Ecs.second.at(j).at(k)));
-            for (size_t k = 0, l = Ecs.second.at(j).size(); k < l / 2; k++) D += Ecs.first(index.at(Ecs.second.at(j).at(l/2 + k)));
-            E /= D, D = 0;
-        }
-        return E;
-    }
-
-    return E - 1;
-}
-
 std::vector<std::string> split(const std::string &str, char delim) {
     std::vector<std::string> result; std::stringstream ss(str); std::string line;
     while (getline(ss, line, delim)) {result.push_back (line);} return result;
 }
 
-std::tuple<std::vector<std::pair<Tensor<4>, std::vector<int>>>, std::vector<std::vector<int>>, std::vector<int>> parse(const Tensor<4>& Jmsa, const std::string& contr, int order, int nos, int nvs) {
-    // resulting slices, contraction axes and dimension
-    std::vector<std::pair<Tensor<4>, std::vector<int>>> Jcs;
-    std::vector<std::vector<int>> Ecs; std::vector<int> dim;
-
-    // unique indices, index to axis map, and index counter
-    std::set<char> indices; std::map<char, int> i2a; int i = 0; Eigen::array<Eigen::Index, 4> si, sr;
-
-    std::vector<std::string> contrs = split(contr, '-');
-
-    // fill the unique indices
-    for (const char& c : contr) if (c != '-') indices.insert(c);
-
-    // fill the index to axis map
-    for (const char& index : indices) if (std::string(OCC).find(index) != std::string::npos) i2a[index] = i++, dim.push_back(nos);
-    for (const char& index : indices) if (std::string(VRT).find(index) != std::string::npos) i2a[index] = i++, dim.push_back(nvs);
-
-    // fill the slices and contraction axes
-    for (int j = 0; j < (int)contrs.size(); j++) {
-        std::vector<int> axis; for (const char& c : contrs.at(j)) axis.push_back(i2a[c]);
-        for (int k = 0; j < order && k < 4; k++) {
-            if (std::string(OCC).find(contrs.at(j).at(k)) != std::string::npos) {si[k] = 0, sr[k] = nos;} else {si[k] = nos, sr[k] = nvs;}
-        } if (j < order) Jcs.push_back({Jmsa.slice(si, sr), axis}); else Ecs.push_back(axis);
-    }
-
-    return {Jcs, Ecs, dim};
-}
-
 int main(int argc, char** argv) {
     argparse::ArgumentParser program("Acorn Moller-Plesset Pertrubation Theory Program", "1.0", argparse::default_arguments::none); Timer::Timepoint start = Timer::Now();
 
@@ -95,64 +48,92 @@ int main(int argc, char** argv) {
     } if (program.get<bool>("-h")) {std::cout << program.help().str(); exit(EXIT_SUCCESS);} Timer::Timepoint tp = Timer::Now(), mpit = Timer::Now();
 
     // load the system and integrals in MS basis from disk
-    MEASURE("SYSTEM AND INTEGRALS IN MS BASIS READING: ",
-        Matrix    Vms = Eigen::LoadMatrix("V_MS.mat");
-        Matrix    Tms = Eigen::LoadMatrix("T_MS.mat");
-        Tensor<4> Jms = Eigen::LoadTensor("J_MS.mat");
-        Matrix    Ems = Eigen::LoadMatrix("E_MS.mat");
-        Vector    N   = Eigen::LoadMatrix("N.mat"   );
+    MEASURE("SYSTEM AND ONE-ELECTRON INTEGRALS IN MS BASIS READING: ",
+        Matrix Vms = Eigen::LoadMatrix("V_MS.mat");
+        Matrix Tms = Eigen::LoadMatrix("T_MS.mat");
+        Vector N   = Eigen::LoadMatrix("N.mat"   );
+    )
+    MEASURE("SYSTEM AND TWO-ELECTRON INTEGRALS IN MS BASIS READING: ",
+        torch::Tensor Jms = torch::from_blob(Eigen::LoadTensor("J_MS.mat").data(), {Vms.rows(), Vms.rows(), Vms.rows(), Vms.rows()}, torch::TensorOptions().dtype(torch::kDouble)).clone();
+        torch::Tensor Ems = torch::from_blob(Eigen::LoadMatrix("E_MS.mat").data(), {Vms.rows()                                    }, torch::TensorOptions().dtype(torch::kDouble)).clone();
     )
 
     // extract the number of occupied and virtual orbitals and define the energy
-    int nocc = N(0); int nvirt = Jms.dimension(0) / 2 - nocc; int nos = 2 * nocc, nvs = 2 * nvirt; double E = 0; std::vector<double> Empn; nthread = program.get<int>("-n");
+    int nocc = N(0); int nvirt = Vms.rows() / 2 - nocc; int nos = 2 * nocc, nvs = 2 * nvirt; double E = 0; std::vector<double> Empn; nthread = program.get<int>("-n");
 
     // initialize the antisymmetrized Coulomb integrals in Physicist's notation and the Hamiltonian matrix in MS basis
-    Tensor<4> Jmsa = (Jms - Jms.shuffle(Eigen::array<int, 4>{0, 3, 2, 1})).shuffle(Eigen::array<int, 4>{0, 2, 1, 3}); Matrix Hms = Tms + Vms;
+    torch::Tensor Jmsa = (Jms - Jms.permute({0, 3, 2, 1})).permute({0, 2, 1, 3}); Matrix Hms = Tms + Vms;
 
     // calculate the HF energy
     for (int i = 0; i < 2 * nocc; i++) {
-        E += Hms(i, i); for (int j = 0; j < 2 * nocc; j++) E += 0.5 * Jmsa(i, j, i, j);
+        E += Hms(i, i); for (int j = 0; j < 2 * nocc; j++) E += 0.5 * Jmsa.index({i, j, i, j}).item().toDouble();
     }
 
     // print new line
     std::cout << std::endl;
 
     // loop over the orders of MP perturbation theory
-    for (int o = 2; o <= program.get<int>("-o"); o++) {
+    for (int i = 2; i <= program.get<int>("-o"); i++) {
 
         // start the timer and define contractions with energy
-        mpit = Timer::Now(); std::vector<std::string> contrs = split(mbptf.at(o - 2), ' '); std::vector<double> Empi(contrs.size());
+        mpit = Timer::Now(); std::vector<std::string> contributions = split(mbptf.at(i - 2), ' '); std::vector<double> Empi(contributions.size());
 
         // print the required number of contributions
-        std::printf("MP%02d ENERGY CALCULATION\n%13s %17s %12s\n", o, "CONTR", "VALUE", "TIME");
+        std::printf("MP%02d ENERGY CALCULATION\n%13s %17s %12s\n", i, "CONTR", "VALUE", "TIME");
 
         // loop over the contractions
         #ifdef _OPENMP
         #pragma omp parallel for num_threads(nthread)
         #endif
-        for (int i = 0; i < (int)contrs.size(); i++) {
-
-            // start the timer
-            tp = Timer::Now();
-
-            // parse the contraction to get slices, axes and dimensions
-            auto [Jcs, Ecs, dim] = parse(Jmsa, split(contrs.at(i), ';').at(0), o, nos, nvs);
+        for (int j = 0; j < (int)contributions.size(); j++) {
+
+            // start the timer, split the contribution and replace the dashes with commas
+            tp = Timer::Now(); std::vector<std::string> contribution = split(contributions.at(j), ';'); std::replace(contribution.at(0).begin(), contribution.at(0).end(), '-', ',');
+            
+            // define the tensor slices with axes, array of orbital energy slices and vector of ones for reshaping
+            std::vector<torch::Tensor> views; std::vector<Slice> axes(4); std::vector<torch::Tensor> epsts; std::vector<int64_t> ones = {-1};
+
+            // extract the contraction indices and split them
+            std::vector<std::string> contrinds = split(contribution.at(0), ',');
+
+            // add the coulomb contributions
+            for (int k = 0; k < i; k++) {
+                for (int l = 0; l < 4; l++) {
+                    if (std::string(OCC).find(contrinds.at(k).at(l)) != std::string::npos) axes.at(l) = Slice(None, nos);
+                    if (std::string(VRT).find(contrinds.at(k).at(l)) != std::string::npos) axes.at(l) = Slice(nos, None);
+                } views.push_back(Jmsa.index({axes.at(0), axes.at(1), axes.at(2), axes.at(3)}));
+            }
+
+            // add the energy denominators
+            for (int k = i; k < contrinds.size(); k++, epsts.clear(), ones = {-1}) {
+                for (int l = 0; l < contrinds.at(k).size(); l++) {
+                    if (std::string(OCC).find(contrinds.at(k).at(l)) != std::string::npos) {
+                        epsts.push_back(+1*Ems.index({Slice(None, nos)}).reshape(at::IntArrayRef(ones))), ones.push_back(1);
+                    }
+                }
+                for (int l = 0; l < contrinds.at(k).size(); l++) {
+                    if (std::string(VRT).find(contrinds.at(k).at(l)) != std::string::npos) {
+                        epsts.push_back(-1*Ems.index({Slice(nos, None)}).reshape(at::IntArrayRef(ones))), ones.push_back(1);
+                    }
+                }
+                views.push_back(1 / (std::accumulate(epsts.begin() + 1, epsts.end(), epsts.at(0))));
+            }
 
             // add the contribution to the energy
-            double Empii = std::stof(split(contrs.at(i), ';').at(1)) * loop(Jcs, {Ems, Ecs}, dim); Empi.at(i) = Empii;
+            double Empii = std::stod(contribution.at(1)) * torch::einsum(contribution.at(0), views).item().toDouble(); Empi.at(j) = Empii;
 
             // print the contribution
-            std::printf("%06d/%06d %17.14f %s\n", i + 1, (int)contrs.size(), Empii, Timer::Format(Timer::Elapsed(tp)).c_str());
+            std::printf("%06d/%06d %17.14f %s\n", j + 1, (int)contributions.size(), Empii, Timer::Format(Timer::Elapsed(tp)).c_str());
         }
 
         // print the MPN energy
-        Empn.push_back(std::accumulate(Empi.begin(), Empi.end(), 0.0)); std::printf("MP%02d CORRELATION ENERGY: %.16f\n", o, Empn.back());
+        Empn.push_back(std::accumulate(Empi.begin(), Empi.end(), 0.0)); std::printf("MP%02d CORRELATION ENERGY: %.16f\n", i, Empn.back());
 
         // print the MPN timing
-        std::printf("MP%02d CORRELATION ENERGY TIMING: %s\n", o, Timer::Format(Timer::Elapsed(mpit)).c_str());
+        std::printf("MP%02d CORRELATION ENERGY TIMING: %s\n", i, Timer::Format(Timer::Elapsed(mpit)).c_str());
 
         // print new line
-        if (o < program.get<int>("-o")) std::cout << std::endl;
+        if (i < program.get<int>("-o")) std::cout << std::endl;
     }
 
     // sum the MPN energies