From 85d168276c10a42c16283bacc61391b82b9ee399 Mon Sep 17 00:00:00 2001
From: Jaron Kent-Dobias <jaron@kent-dobias.com>
Date: Mon, 4 Jan 2021 18:22:46 +0100
Subject: A lot of uncompleted work.

---
 langevin.cpp | 242 +++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 185 insertions(+), 57 deletions(-)

diff --git a/langevin.cpp b/langevin.cpp
index 241abaf..b6f16d7 100644
--- a/langevin.cpp
+++ b/langevin.cpp
@@ -1,11 +1,15 @@
+#include <algorithm>
 #include <getopt.h>
 #include <cstdlib>
 #include <random>
 #include <complex>
 #include <array>
 #include <functional>
+#include <list>
 
 #include <eigen3/unsupported/Eigen/CXX11/Tensor>
+#include <eigen3/Eigen/Core>
+#include <eigen3/Eigen/Cholesky>
 
 #include "pcg-cpp/include/pcg_random.hpp"
 #include "randutils/randutils.hpp"
@@ -13,10 +17,14 @@
 #define P_SPIN_P 3
 const unsigned p = P_SPIN_P;
 
-using Tensor = Eigen::Tensor<std::complex<double>, p>;
-using Scalar = Eigen::Tensor<std::complex<double>, 0>;
-using Vector = Eigen::Tensor<std::complex<double>, 1>;
-using Matrix = Eigen::Tensor<std::complex<double>, 2>;
+using Scalar = std::complex<double>;
+using TensorP = Eigen::Tensor<Scalar, p>;
+using Tensor0 = Eigen::Tensor<Scalar, 0>;
+using Tensor1 = Eigen::Tensor<Scalar, 1>;
+using Tensor2 = Eigen::Tensor<Scalar, 2>;
+using Tensor3 = Eigen::Tensor<Scalar, 3>;
+using Matrix = Eigen::MatrixXcd;
+using Vector = Eigen::VectorXcd;
 
 const std::array<Eigen::IndexPair<int>, 1> ip = {Eigen::IndexPair<int>(0,0)};
 
@@ -49,11 +57,21 @@ long unsigned factorial(unsigned p) {
   }
 }
 
-Tensor generateCouplings(unsigned N, std::complex<double> κ, Rng& r) {
+/*
+void populateCouplings(TensorP& J, unsigned level, std::list<unsigned> indices, complex_normal_distribution<> dist, Rng& r) {
+  if (level == 0) {
+    Scalar x = dist(r.engine());
+    do {
+    } while (std::next_permutation(indices.begin(), indices.end()))
+  }
+}
+*/
+
+Tensor3 generateCouplings(unsigned N, std::complex<double> κ, Rng& r) {
   double σ = sqrt(factorial(p) / (2.0 * pow(N, p - 1)));
   complex_normal_distribution<> dist(0, σ, κ);
 
-  Tensor J(N, N, N);
+  TensorP J(N, N, N);
 
   for (unsigned i1 = 0; i1 < N; i1++) {
     for (unsigned i2 = i1; i2 < N; i2++) {
@@ -84,70 +102,186 @@ double norm(const Eigen::Tensor<std::complex<double>, r>& t) {
   return t2(0);
 }
 
-Vector initializeVector(unsigned N) {
+Vector initializeVector(unsigned N, Rng& r) {
   Vector z(N);
-  z.setConstant(1);
+  complex_normal_distribution<> dist(0, 1, 0);
+
+  for (unsigned i = 0; i < N; i++) {
+    z(i) = dist(r.engine());
+  }
+
+  z *= sqrt(N) / sqrt(z.dot(z));
+
   return z;
 }
 
-std::complex<double> hamiltonian(const Tensor& J, const Vector& z) {
-  Eigen::Tensor<std::complex<double>, 0> t = z.contract(z.contract(z.contract(J, ip), ip), ip);
-  return t(0) / (double)factorial(p);
+template <int r>
+Eigen::Tensor<Scalar, 3> contractDown(const Eigen::Tensor<Scalar, r>& J, const Eigen::Tensor<Scalar, 1>& z) {
+  return contractDown<r - 1>(J.contract(z, ip), z);
 }
 
-Matrix identity(unsigned N) {
-  Matrix I(N, N);
-  for (unsigned i = 0; i < N; i++) {
-    I(i, i) = 1;
+template <>
+Eigen::Tensor<Scalar, 3> contractDown(const Eigen::Tensor<Scalar, 3>& J, const Eigen::Tensor<Scalar, 1>& z) {
+  return J;
+}
+
+std::tuple<std::complex<double>, Vector, Matrix> hamGradHess(const Tensor3& J, const Vector& z) {
+  Tensor1 zT = Eigen::TensorMap<Eigen::Tensor<const std::complex<double>, 1>>(z.data(), {z.size()});
+
+  Tensor2 J1 = zT.contract(J, ip);
+  Tensor1 J2 = zT.contract(J1, ip);
+  Tensor0 J3 = zT.contract(J2, ip);
+
+  Matrix hess = ((p - 1) * (double)p / factorial(p)) * Eigen::Map<Matrix>(J1.data(), z.size(), z.size());
+  Vector grad = ((double)p / factorial(p)) * Eigen::Map<Vector>(J2.data(), z.size());
+  std::complex<double> hamiltonian = (1.0 / factorial(p)) * J3(0);
+
+  return {hamiltonian, grad, hess};
+}
+
+Vector stereographicToEuclidean(const Vector& ζ) {
+  unsigned N = ζ.size() + 1;
+  Vector z(N);
+  Scalar a = ζ.dot(ζ);
+  std::complex<double> b = 2.0 * sqrt(N) / (1.0 + a);
+
+  for (unsigned i = 0; i < N - 1; i++) {
+    z(i) = b * ζ(i);
+  }
+
+  z(N - 1) = b * (a - 1.0) / 2.0;
+
+  return z;
+}
+
+Vector euclideanToStereographic(const Vector& z) {
+  unsigned N = z.size();
+  Vector ζ(N - 1);
+  
+  for (unsigned i = 0; i < N - 1; i++) {
+    ζ(i) = z(i) / (sqrt(N) - z(N - 1));
+  }
+
+  return ζ;
+}
+
+Matrix stereographicJacobian(const Vector& ζ) {
+  unsigned N = ζ.size() + 1;
+  Matrix J(N - 1, N);
+
+  Scalar b = ζ.dot(ζ);
+
+  for (unsigned i = 0; i < N - 1; i++) {
+    for (unsigned j = 0; j < N - 1; j++) {
+      J(i, j) = - 4.0 * ζ(i) * ζ(j);
+      if (i == j) {
+        J(i, j) += 2.0 * (1.0 + b);
+      }
+      J(i, j) *= sqrt(N) / pow(1.0 + b, 2);
+    }
+
+    J(i, N - 1) = 4.0 * sqrt(N) * ζ(i) / pow(1.0 + b, 2);
   }
-  return I;
+
+  return J;
 }
 
-std::tuple<Vector, Matrix> gradHess(const Tensor& J, const Vector& z, std::complex<double> ε) {
-  Matrix J1 = z.contract(J, ip);
-  Matrix hess = ((p - 1) * (double)p / factorial(p)) * J1 - ((double)p * ε) * identity(z.size());
+Matrix stereographicMetric(const Vector& ζ) {
+  unsigned N = ζ.size();
+  Matrix g(N, N);
 
-  Vector J2 = z.contract(J1, ip);
-  Vector grad = ((double)p / factorial(p)) * J2 - ((double)p * ε) * z;
+  double a = ζ.cwiseAbs2().sum();
+  Scalar b = ζ.dot(ζ);
 
-  return {grad, hess};
+  for (unsigned i = 0; i < N; i++) {
+    for (unsigned j = 0; j < N; j++) {
+      g(i, j) = 16.0 * (std::conj(ζ(i)) * ζ(j) * (1.0 + a) - std::real(std::conj(ζ(i) * ζ(j)) * (1.0 + b)));
+      if (i == j) {
+        g(i, j) += 4.0 * std::abs(1.0 + b);
+      }
+      g(i, j) *= (N + 1) / std::norm(pow(b, 2));
+    }
+  }
+
+  return g;
 }
 
-std::tuple<double, Vector, std::complex<double>, Vector> WdW(const Tensor& J, const Vector& z, std::complex<double> ε) {
+std::tuple<std::complex<double>, Vector, Matrix> stereographicHamGradHess(const Tensor3& J, const Vector& ζ) {
   Vector grad;
   Matrix hess;
 
-  std::tie(grad, hess) = gradHess(J, z, ε);
+  Matrix jacobian = stereographicJacobian(ζ);
+  Vector z = stereographicToEuclidean(ζ);
 
-  double W = norm(grad);
-  Matrix conjHess = conj(hess);
-  Scalar zz = z.pow(2).sum();
-  Vector zc = conj(z);
+  std::complex<double> hamiltonian;
+  Vector gradZ;
+  Matrix hessZ;
+  std::tie(hamiltonian, gradZ, hessZ) = hamGradHess(J, z);
 
-  Vector dWdz = grad.contract(conjHess, ip) - (pow(p, 2) / 2.0 * ((double)z.size() - zz(0))) * zc;
-  Scalar dWdε = (-(double)p) * grad.contract(zc, ip);
-  Vector dεdz = (1 / (double)z.size()) * ((1 - 1/(double)p) * grad + ((double)p * ε) * z - (1 /(double)p) * z.contract(hess, ip));
+  Matrix g = stereographicMetric(ζ);
+  Matrix gj = g.llt().solve(jacobian);
 
-  return {W, dWdz, dWdε(0), dεdz};
+  grad = gj * gradZ;
+  hess = gj * hessZ * gj.transpose();
 
+  return {hamiltonian, grad, hess};
 }
 
-void gradientDescent(const Tensor& J, Vector& z, std::complex<double>& ε, double δ, double γ) {
+std::tuple<double, Vector, Matrix> WdW(const Tensor3& J, const Vector& z) {
+  Vector grad;
+  Matrix hess;
+
+  std::tie(std::ignore, grad, hess) = hamGradHess(J, z);
+
+  Tensor1 gradT = Eigen::TensorMap<Eigen::Tensor<const Scalar, 1>>(grad.data(), {z.size()});
+
+  Vector gtmp = grad - (grad.dot(z) / (double)z.size()) * z;
+  double W = gtmp.cwiseAbs2().sum();
+  Vector dW = (hess - (z.dot(grad) * Matrix::Identity(z.size(), z.size()) + grad * z.transpose() + z * (hess * z).transpose()) / (double)z.size()).conjugate() * gtmp;
+  Tensor2 ddWT = ((p - 2) * (p - 1) * (double)p / factorial(p)) * conj(J).contract(gradT, ip);
+  Matrix ddW = Eigen::Map<Matrix>(ddWT.data(), z.size(), z.size());
+
+  return {W, dW, ddW};
+}
+
+Vector findSaddle(const Tensor3& J, const Vector& z0, double δ, double γ) {
+  Vector z = z0;
   double W;
-  Vector dz;
-  std::complex<double> dε;
+  Vector dW;
+  Matrix ddW;
 
-  std::tie(W, dz, dε, std::ignore) = WdW(J, z, ε);
+  std::tie(W, dW, ddW) = WdW(J, z);
 
   while (W > δ * z.size()) {
-    std::cout << W << std::endl;
-    z = z - (γ / z.size()) * dz;
-    ε = ε - (γ / z.size()) * dε;
-
-    std::tie(W, dz, dε, std::ignore) = WdW(J, z, ε);
+ //   Vector dz = ddW.ldlt().solve(dW);
+    Vector dz = dW;
+    while (true) {
+      Vector newz = z - γ * dz;
+      newz *= sqrt(newz.size()) / sqrt(newz.dot(newz));
+
+      double newW;
+      Vector newdW;
+      Matrix newddW;
+      std::tie(newW, newdW, newddW) = WdW(J, newz);
+
+      if (newW < W) {
+        γ *= 1.01;
+        z = newz;
+        W = newW;
+        dW= newdW;
+        ddW = newddW;
+        break;
+      } else {
+        γ /= 0.9;
+      }
+    }
+  std::cout << W << std::endl;
   }
+
+  return z;
 }
 
+/*
 Vector generateKick(const Vector& z, Rng& r) {
   Vector k(z.size());
 
@@ -164,9 +298,8 @@ Vector generateKick(const Vector& z, Rng& r) {
 void langevin(const Tensor& J, Vector& z, std::complex<double>& ε, double T, unsigned t, double γ, Rng& r) {
   double W;
   Vector dz;
-  Vector dεdz;
 
-  std::tie(W, dz, std::ignore, dεdz) = WdW(J, z, ε);
+  std::tie(W, dz) = WdW(J, z);
 
   for (unsigned i = 0; i < t; i++) {
     std::cout << W << std::endl;
@@ -181,6 +314,7 @@ void langevin(const Tensor& J, Vector& z, std::complex<double>& ε, double T, un
     std::tie(W, dz, std::ignore, dεdz) = WdW(J, z, ε);
   }
 }
+*/
 
 int main(int argc, char* argv[]) {
   // model parameters
@@ -227,23 +361,17 @@ int main(int argc, char* argv[]) {
 
   Rng r;
 
-  Tensor J = generateCouplings(N, κ, r);
-  Vector z = initializeVector(N);
-  std::complex<double> ε = hamiltonian(J, z) / (double)N;
-
-  Scalar zz = z.pow(2).sum();
+  Tensor3 J = generateCouplings(N, κ, r);
+  Vector z = initializeVector(N, r);
 
-  std::cout << zz(0) << std::endl;
+  Vector zm = findSaddle(J, z, δ, γ);
 
-  gradientDescent(J, z, ε, δ, γ);
-  langevin(J, z, ε, T, t, γ, r);
+  std::complex<double> H;
+  Vector dH;
 
-  zz = z.pow(2).sum();
-  double a = norm(z);
+  std::tie(H, dH, std::ignore) = hamGradHess(J, zm);
 
-  std::cout << zz(0) / (double)N << std::endl;
-  std::cout << ε << std::endl;
-  std::cout << hamiltonian(J, z) / (double)N << std::endl;
-  std::cout << a / N << std::endl;
+  std::cout << zm << std::endl;
+  std::cout << dH - (H / (double)N) * zm << std::endl;
 }
 
-- 
cgit v1.2.3-70-g09d2