Got Newton's method working well.

author: Jaron Kent-Dobias <jaron@kent-dobias.com> 2021-01-06 17:36:28 +0100
committer: Jaron Kent-Dobias <jaron@kent-dobias.com> 2021-01-06 17:36:28 +0100
commit: 752d0b61595166aafa93b48dd6c209381ac00f02 (patch)
tree: 630ab0d43eda76deec18903f256982e8d2c39eab
parent: bfa9e59782d9c814568cca2e9ed56094d04c7bc7 (diff)
download: code-752d0b61595166aafa93b48dd6c209381ac00f02.tar.gz
code-752d0b61595166aafa93b48dd6c209381ac00f02.tar.bz2
code-752d0b61595166aafa93b48dd6c209381ac00f02.zip
2 files changed, 64 insertions, 40 deletions
diff --git a/langevin.cpp b/langevin.cpp
index acea609..49efab4 100644
--- a/langevin.cpp
+++ b/langevin.cpp
@@ -8,6 +8,7 @@
 #include "randutils/randutils.hpp"
 
 #include <eigen3/Eigen/LU>
+#include <eigen3/Eigen/Dense>
 
 #include "complex_normal.hpp"
 #include "p-spin.hpp"
@@ -15,6 +16,10 @@
 
 using Rng = randutils::random_generator<pcg32>;
 
+Vector normalize(const Vector& z) {
+  return z * sqrt(z.size()) / sqrt((Scalar)(z.transpose() * z));
+}
+
 template <class Distribution, class Generator>
 Vector randomVector(unsigned N, Distribution d, Generator& r) {
   Vector z(N);
@@ -36,19 +41,18 @@ std::tuple<double, Vector> gradientDescent(const Tensor& J, const Vector& z0, do
   double γ = γ0;
 
   while (W > ε) {
-    Vector zNew = z - γ * dW.conjugate();
-    zNew *= sqrt(z.size()) / sqrt((Scalar)(zNew.transpose() * zNew));
+    Vector zNew = normalize(z - γ * dW.conjugate());
 
     double WNew;
     Vector dWNew;
     std::tie(WNew, dWNew) = WdW(J, zNew);
 
-    if (WNew < W) {
+    if (WNew < W) { // If the step lowered the objective, accept it!
       z = zNew;
       W = WNew;
       dW = dWNew;
       γ = γ0;
-    } else {
+    } else { // Otherwise, shrink the step and try again.
       γ *= 0.5;
     }
 
@@ -70,6 +74,9 @@ Vector findSaddle(const Tensor& J, const Vector& z0, double ε, double δW, doub
   Matrix ddH;
   std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ);
 
+  unsigned steps = 0;
+  unsigned gradSteps = 0;
+
   while (W > ε) {
     // ddH is complex symmetric, which is (almost always) invertible
     Vector dζ = ddH.partialPivLu().solve(dH);
@@ -85,37 +92,42 @@ Vector findSaddle(const Tensor& J, const Vector& z0, double ε, double δW, doub
       Vector z;
       std::tie(W, z) = gradientDescent(J, stereographicToEuclidean(ζ), γ0, W / δW);
       ζ = euclideanToStereographic(z);
+      gradSteps++;
     }
 
     std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ);
-    std::cout << W << std::endl;
+    steps++;
+
+    if (steps % 100 == 0) {
+      std::cerr << steps << " minimization steps, W is " << W << " " << gradSteps << "." << std::endl;
+    }
   }
 
   return stereographicToEuclidean(ζ);
 }
 
-Vector langevin(const Tensor& J, const Vector& z0, double T, double γ0,
-                std::function<bool(double, unsigned)> quit, Rng& r) {
+std::tuple<double, Vector> langevin(const Tensor& J, const Vector& z0, double T, double γ, unsigned N, Rng& r) {
   Vector z = z0;
 
   double W;
-  Vector dW;
-  std::tie(W, dW) = WdW(J, z);
+  std::tie(W, std::ignore) = WdW(J, z);
 
-  unsigned steps = 0;
-  complex_normal_distribution<> d(0, sqrt(T), 0);
+  complex_normal_distribution<> d(0, γ, 0);
 
-  while (!quit(W, steps)) {
-    double γ = pow(r.variate<double, std::normal_distribution>(0, γ0), 2);
-    Vector η = randomVector(z.size(), d, r.engine());
+  for (unsigned i = 0; i < N; i++) {
+    Vector dz = randomVector(z.size(), d, r.engine());
+    Vector zNew = normalize(z + dz);
 
-    z -= γ * (dW.conjugate() / pow((double)z.size(), 2) + η);
-    z *= sqrt(z.size()) / sqrt((Scalar)(z.transpose() * z));
+    double WNew;
+    std::tie(WNew, std::ignore) = WdW(J, zNew);
 
-    std::tie(W, dW) = WdW(J, z);
+    if (exp((W - WNew) / T) > r.uniform(0.0, 1.0)) {
+      z = zNew;
+      W = WNew;
+    }
   }
 
-  return z;
+  return {W, z};
 }
 
 int main(int argc, char* argv[]) {
@@ -130,10 +142,12 @@ int main(int argc, char* argv[]) {
   double δ = 1e-2;   // threshold for determining saddle
   double γ = 1e-2;   // step size
   unsigned t = 1000; // number of Langevin steps
+  unsigned M = 100;
+  unsigned n = 100;
 
   int opt;
 
-  while ((opt = getopt(argc, argv, "N:T:e:r:i:g:t:E:")) != -1) {
+  while ((opt = getopt(argc, argv, "N:M:n:T:e:r:i:g:t:E:")) != -1) {
     switch (opt) {
     case 'N':
       N = (unsigned)atof(optarg);
@@ -158,6 +172,12 @@ int main(int argc, char* argv[]) {
     case 'i':
       Iκ = atof(optarg);
       break;
+    case 'n':
+      n = (unsigned)atof(optarg);
+      break;
+    case 'M':
+      M = (unsigned)atof(optarg);
+      break;
     default:
       exit(1);
     }
@@ -169,24 +189,27 @@ int main(int argc, char* argv[]) {
   Rng r;
 
   Tensor J = generateCouplings<Scalar, PSPIN_P>(N, complex_normal_distribution<>(0, σ, κ), r.engine());
-  Vector z = randomVector(N, complex_normal_distribution<>(0, 1, 0), r.engine());
-  z *= sqrt(N) / sqrt((Scalar)(z.transpose() * z)); // Normalize.
+  Vector z0 = randomVector(N, complex_normal_distribution<>(0, 1, 0), r.engine());
+  z0 *= sqrt(N) / sqrt((Scalar)(z0.transpose() * z0)); // Normalize.
 
   std::function<bool(double, unsigned)> f = [δ](double W, unsigned) {
     std::cout << W << std::endl;
     return W < δ;
   };
 
-  //Vector zm = langevin(J, z, T, γ, f, r);
-  Vector zm = findSaddle(J, z, ε, δ, γ);
-
-  Scalar H;
-  Vector dH;
-
-  std::tie(H, dH, std::ignore) = hamGradHess(J, zm);
+  Vector zSaddle = findSaddle(J, z0, ε, δ, γ);
 
-  Vector constraint = dH - ((double)p * H / (double)N) * zm;
+  double W;
+  Vector z = zSaddle;
+  for (unsigned i = 0; i < n; i++) {
+    std::tie(W, z) = langevin(J, z, T, γ, M, r);
+    Vector zNewSaddle = findSaddle(J, z, ε, δ, γ);
+    Scalar H;
+    Matrix ddH;
+    std::tie(H, std::ignore, ddH) = hamGradHess(J, zNewSaddle);
+    Eigen::SelfAdjointEigenSolver<Matrix> es(ddH.adjoint() * ddH);
+    std::cout << (zNewSaddle - zSaddle).norm() << " " << real(H) << " " << imag(H) << " " << es.eigenvalues().transpose() << std::endl;
+  }
 
-  std::cout << constraint << std::endl;
-  std::cout << H / (double)N << std::endl;
+  return 0;
 }
diff --git a/stereographic.hpp b/stereographic.hpp
index 515890e..a3f2cc6 100644
--- a/stereographic.hpp
+++ b/stereographic.hpp
@@ -54,7 +54,8 @@ Matrix stereographicMetric(const Matrix& J) {
   return J * J.adjoint();
 }
 
-Eigen::Tensor<Scalar, 3> stereographicChristoffel(const Vector& ζ, const Matrix& gInvJac) {
+// Gives the Christoffel symbol, with Γ_(i1, i2)^(i3).
+Eigen::Tensor<Scalar, 3> stereographicChristoffel(const Vector& ζ, const Matrix& gInvJacConj) {
   unsigned N = ζ.size() + 1;
   Eigen::Tensor<Scalar, 3> dJ(N - 1, N - 1, N);
 
@@ -83,7 +84,7 @@ Eigen::Tensor<Scalar, 3> stereographicChristoffel(const Vector& ζ, const Matrix
 
   std::array<Eigen::IndexPair<int>, 1> ip = {Eigen::IndexPair<int>(2, 1)};
 
-  return dJ.contract(Eigen::TensorMap<Eigen::Tensor<const Scalar, 2>>(gInvJac.data(), N - 1, N), ip);
+  return dJ.contract(Eigen::TensorMap<Eigen::Tensor<const Scalar, 2>>(gInvJacConj.data(), N - 1, N), ip);
 }
 
 std::tuple<Scalar, Vector, Matrix> stereographicHamGradHess(const Tensor& J, const Vector& ζ) {
@@ -98,19 +99,19 @@ std::tuple<Scalar, Vector, Matrix> stereographicHamGradHess(const Tensor& J, con
   Matrix hessZ;
   std::tie(hamiltonian, gradZ, hessZ) = hamGradHess(J, z);
 
-  Matrix g = stereographicMetric(jacobian);
-  // g is Hermitian, which is positive definite.
-  Matrix gInvJac = g.llt().solve(jacobian);
+  Matrix metric = stereographicMetric(jacobian);
 
-  grad = gInvJac * gradZ;
+  grad = metric.llt().solve(jacobian) * gradZ;
 
-  Eigen::Tensor<Scalar, 3> Γ = stereographicChristoffel(ζ, gInvJac);
+  /* This is much slower to calculate than the marginal speedup it offers...
+  Eigen::Tensor<Scalar, 3> Γ = stereographicChristoffel(ζ, gInvJac.conjugate());
   Vector df = jacobian * gradZ;
   Eigen::Tensor<Scalar, 1> dfT = Eigen::TensorMap<Eigen::Tensor<Scalar, 1>>(df.data(), {df.size()});
-  std::array<Eigen::IndexPair<int>, 1> ip = {Eigen::IndexPair<int>(0, 0)};
+  std::array<Eigen::IndexPair<int>, 1> ip = {Eigen::IndexPair<int>(2, 0)};
   Eigen::Tensor<Scalar, 2> H2 = Γ.contract(dfT, ip);
+  */
 
-  hess = jacobian * hessZ * jacobian.transpose() - Eigen::Map<Matrix>(H2.data(), ζ.size(), ζ.size()).transpose();
+  hess = jacobian * hessZ * jacobian.transpose(); // - Eigen::Map<Matrix>(H2.data(), ζ.size(), ζ.size());
 
   return {hamiltonian, grad, hess};
 }
author	Jaron Kent-Dobias <jaron@kent-dobias.com>	2021-01-06 17:36:28 +0100
committer	Jaron Kent-Dobias <jaron@kent-dobias.com>	2021-01-06 17:36:28 +0100
commit	752d0b61595166aafa93b48dd6c209381ac00f02 (patch)
tree	630ab0d43eda76deec18903f256982e8d2c39eab
parent	bfa9e59782d9c814568cca2e9ed56094d04c7bc7 (diff)
download	code-752d0b61595166aafa93b48dd6c209381ac00f02.tar.gz code-752d0b61595166aafa93b48dd6c209381ac00f02.tar.bz2 code-752d0b61595166aafa93b48dd6c209381ac00f02.zip