1 files changed, 52 insertions, 62 deletions
diff --git a/least_squares.cpp b/least_squares.cpp
index b5b3a9a..b294679 100644
--- a/least_squares.cpp
+++ b/least_squares.cpp
@@ -12,10 +12,6 @@ using Real = double;
 using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>;
 using Matrix = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>;
 
-/* Eigen tensor manipulations are quite annoying, especially with the need to convert other types
- * into tensors beforehand. Here I overload multiplication operators to allow contraction between
- * vectors and the first or last index of a tensor.
- */
 class Tensor : public Eigen::Tensor<Real, 3> {
   using Eigen::Tensor<Real, 3>::Tensor;
 
@@ -26,10 +22,6 @@ public:
     const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20);
     return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1));
   }
-
-  Tensor operator+(const Eigen::Tensor<Real, 3>& J) const {
-    return Eigen::Tensor<Real, 3>::operator+(J);
-  }
 };
 
 Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) {
@@ -51,11 +43,11 @@ Real HFromV(const Vector& V) {
   return 0.5 * V.squaredNorm();
 }
 
-Vector dHFromVdV(const Vector& V, const Matrix& dV) {
-  return V.transpose() * dV;
+Vector dHFromVdV(const Vector& V, const Matrix& ∂V) {
+  return V.transpose() * ∂V;
 }
 
-Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) {
+Vector VFromABJx(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) {
   return b + (A + 0.5 * Jx) * x;
 }
 
@@ -65,15 +57,35 @@ private:
   Matrix A;
   Vector b;
 
+  std::tuple<Vector, Matrix, const Tensor&> V_∂V_∂∂V(const Vector& x) const {
+    Matrix Jx = J * x;
+    Vector V = VFromABJx(b, A, Jx, x);
+    Matrix ∂V = A + Jx;
+
+    return {V, ∂V, J};
+  }
+
+  std::tuple<Real, Vector, Matrix> H_∂H_∂∂H(const Vector& x) const {
+    auto [V, ∂V, ∂∂V] = V_∂V_∂∂V(x);
+
+    Real H = HFromV(V);
+    Vector ∂H = dHFromVdV(V, ∂V);
+    Matrix ∂∂H = V.transpose() * ∂∂V + ∂V.transpose() * ∂V;
+
+    return {H, ∂H, ∂∂H};
+  }
+
 public:
   unsigned N;
   unsigned M;
-  QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) {
-    Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1;
+
+  QuadraticModel(unsigned N, unsigned M, Rng& r, Real μ1, Real μ2, Real μ3) : J(M, N, N), A(M, N), b(M), N(N), M(M) {
+    Eigen::StaticSGroup<Eigen::Symmetry<1,2>> sym23;
+
     for (unsigned k = 0; k < N; k++) {
       for (unsigned j = k; j < N; j++) {
         for (unsigned i = 0; i < M; i++) {
-          ssym1(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N);
+          sym23(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N);
         }
       }
     }
@@ -87,55 +99,33 @@ public:
     }
   }
 
-  std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const {
-    Matrix Jx = J * x;
-    Vector V = VFromABJ(b, A, Jx, x);
-    Matrix dV = A + Jx;
-
-    return {V, dV, J};
-  }
-
-  std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
-    auto [V, dV, ddV] = VdVddV(x);
-
-    Real H = HFromV(V);
-    Vector dH = dHFromVdV(V, dV);
-    Matrix ddH = V.transpose() * ddV + dV.transpose() * dV;
-
-    return {H, dH, ddH};
+  Real getHamiltonian(const Vector& x) const {
+    return HFromV(VFromABJx(b, A, J * x, x));
   }
 
   std::tuple<Real, Vector> getHamGrad(const Vector& x) const {
-    Vector V;
-    Matrix dV;
-    std::tie(V, dV, std::ignore) = VdVddV(x);
+    auto [V, dV, ddV] = V_∂V_∂∂V(x);
 
     Real H = HFromV(V);
-    Vector dH = makeTangent(dHFromVdV(V, dV), x);
+    Vector ∂H = dHFromVdV(V, dV);
+    Vector ∇H = makeTangent(∂H, x);
 
-    return {H, dH};
+    return {H, ∇H};
   }
 
-  std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
-    auto [H, dH, ddH] = HdHddH(x);
+  std::tuple<Real, Vector, Matrix> getHamGradHess(const Vector& x) const {
+    auto [H, ∂H, ∂∂H] = H_∂H_∂∂H(x);
 
-    Vector gradH = dH - dH.dot(x) * x / (Real)N;
-    Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N  + 2.0 * x * x.transpose();
+    Vector ∇H = makeTangent(∂H, x);
+    Matrix HessH = ∂∂H - (∂H * x.transpose() + x.dot(∂H) * Matrix::Identity(N, N)
+        + (∂∂H * x) * x.transpose()) / (Real)N  + 2.0 * x * x.transpose();
 
-    return {H, gradH, hessH};
-  }
-
-  /* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result
-   * in those execution paths getting optimized out. It is much more efficient to compute the
-   * energy alone when only the energy is needed.
-   */
-  Real getHamiltonian(const Vector& x) const {
-    return HFromV(VFromABJ(b, A, J * x, x));
+    return {H, ∇H, HessH};
   }
 
   Vector spectrum(const Vector& x) const {
     Matrix hessH;
-    std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
+    std::tie(std::ignore, std::ignore, hessH) = getHamGradHess(x);
     Eigen::EigenSolver<Matrix> eigenS(hessH);
     return eigenS.eigenvalues().real();
   }
@@ -145,14 +135,14 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7
   Vector x = x0;
   Real λ = 10;
 
-  auto [H, g] = M.getHamGrad(x);
+  auto [H, ∇H] = M.getHamGrad(x);
 
-  while (g.norm() / M.N > ε && λ > ε) {
+  while (∇H.norm() / M.N > ε && λ > ε) {
     Real HNew;
     Vector xNew, gNew;
 
     while(
-      xNew = normalize(x + λ * g),
+      xNew = normalize(x + λ * ∇H),
       HNew = M.getHamiltonian(xNew),
       HNew < H && λ > ε
     ) {
@@ -160,7 +150,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7
     }
 
     x = xNew;
-    std::tie(H, g) = M.getHamGrad(xNew);
+    std::tie(H, ∇H) = M.getHamGrad(xNew);
 
     λ *= 2;
   }
@@ -172,16 +162,16 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) {
   Vector x = x0;
   Real λ = 100;
 
-  auto [H, g, m] = M.hamGradHess(x0);
+  auto [H, ∇H, HessH] = M.getHamGradHess(x0);
 
   while (λ * ε < 1) {
-    Vector dx = (m - λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g);
-    Vector xNew = normalize(x - makeTangent(dx, x));
+    Vector Δx = (HessH - λ * (Matrix)HessH.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(∇H);
+    Vector xNew = normalize(x - makeTangent(Δx, x));
     Real HNew = M.getHamiltonian(xNew);
 
     if (HNew > H) {
       x = xNew;
-      std::tie(H, g, m) = M.hamGradHess(xNew);
+      std::tie(H, ∇H, HessH) = M.getHamGradHess(xNew);
 
       λ /= 1.5;
     } else {
@@ -198,8 +188,8 @@ Vector subagAlgorithm(const QuadraticModel& M, Rng& r, unsigned k, unsigned m) {
   σ(axis) = sqrt(M.N / k);
 
   for (unsigned i = 0; i < k; i++) {
-    auto [H, dH] = M.getHamGrad(σ);
-    Vector v = dH / dH.norm();
+    auto [H, ∇H] = M.getHamGrad(σ);
+    Vector v = ∇H / ∇H.norm();
 
     σ += sqrt(M.N/k) * v;
   }
@@ -251,11 +241,11 @@ int main(int argc, char* argv[]) {
 
   for (unsigned sample = 0; sample < samples; sample++) {
     QuadraticModel leastSquares(N, M, r, σ, A, J);
-    x = findMinimum(leastSquares, x);
-    std::cout << leastSquares.getHamiltonian(x) / N << std::flush;
+    x = gradientDescent(leastSquares, x);
+    std::cout << leastSquares.getHamiltonian(x) / N;
     QuadraticModel leastSquares2(N, M, r, σ, A, J);
     Vector σ = subagAlgorithm(leastSquares2, r, N, 15);
-    σ = findMinimum(leastSquares2, σ);
+    σ = gradientDescent(leastSquares2, σ);
     std::cout << " " << leastSquares2.getHamiltonian(σ) / N << std::endl;
   }