dynamics.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

#pragma once

#include <exception>
#include <eigen3/Eigen/LU>
#include <random>

#include "p-spin.hpp"
#include "stereographic.hpp"

class gradientDescentStallException: public std::exception {
  virtual const char* what() const throw() {
    return "Gradient descent stalled.";
  }
};

template <class Scalar, int p>
std::tuple<double, Vector<Scalar>> gradientDescent(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0, double ε, double γ0 = 1, double δγ = 2) {
  Vector<Scalar> z = z0;
  double γ = γ0;

  auto [W, dW] = WdW(J, z);

  while (W > ε) {
    Vector<Scalar> zNewTmp = z - γ * dW.conjugate();
    Vector<Scalar> zNew = normalize(zNewTmp);

    auto [WNew, dWNew] = WdW(J, zNew);

    if (WNew < W) { // If the step lowered the objective, accept it!
      z = zNew;
      W = WNew;
      dW = dWNew;
      γ = γ0;
    } else { // Otherwise, shrink the step and try again.
      γ /= δγ;
    }

    if (γ < 1e-50) {
      throw gradientDescentStallException();
    }
  }

  return {W, z};
}

template <class Scalar, int p>
Vector<Scalar> findSaddle(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0, double ε, double δW = 2, double γ0 = 1, double δγ = 2) {
  Vector<Scalar> z = z0;
  Vector<Scalar> ζ = euclideanToStereographic(z);

  double W;
  std::tie(W, std::ignore) = WdW(J, z);

  Vector<Scalar> dH;
  Matrix<Scalar> ddH;
  std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z);

  while (W > ε) {
    // ddH is complex symmetric, which is (almost always) invertible, so a
    // partial pivot LU decomposition can be used.
    Vector<Scalar> dζ = ddH.partialPivLu().solve(dH);
    Vector<Scalar> ζNew = ζ - dζ;
    Vector<Scalar> zNew = stereographicToEuclidean(ζNew);

    double WNew;
    std::tie(WNew, std::ignore) = WdW(J, zNew);

    if (WNew < W) { // If Newton's step lowered the objective, accept it!
      ζ = ζNew;
      z = zNew;
      W = WNew;
    } else { // Otherwise, do gradient descent until W is a factor δW smaller.
      std::tie(W, z) = gradientDescent(J, z, W / δW, γ0, δγ);
      ζ = euclideanToStereographic(z);
    }

    std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z);
  }

  return z;
}

template <class Scalar, class Distribution, class Generator>
Vector<Scalar> randomVector(unsigned N, Distribution d, Generator& r) {
  Vector<Scalar> z(N);

  for (unsigned i = 0; i < N; i++) {
    z(i) = d(r);
  }

  return z;
}

template <class Scalar, int p, class Distribution, class Generator>
std::tuple<double, Vector<Scalar>> metropolis(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0,
    std::function<double(const Tensor<Scalar, p>&, const Vector<Scalar>&)>& energy,
    double T, double γ, unsigned N, Distribution d, Generator& r) {
  Vector<Scalar> z = z0;

  double E = energy(J, z);

  std::uniform_real_distribution<double> D(0, 1);

  for (unsigned i = 0; i < N; i++) {
    Vector<Scalar> zNewTmp = z + γ * randomVector<Scalar>(z.size(), d, r);
    Vector<Scalar> zNew = normalize(zNewTmp);

    double ENew = energy(J, zNew);

    if (E - ENew > T * log(D(r))) {
      z = zNew;
      E = ENew;
    }
  }

  return {E, z};
}