1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
#pragma once
#include <exception>
#include <eigen3/Eigen/LU>
#include <random>
#include "p-spin.hpp"
#include "stereographic.hpp"
class gradientDescentStallException: public std::exception {
virtual const char* what() const throw() {
return "Gradient descent stalled.";
}
};
template <class Real, class Scalar, int p>
std::tuple<Real, Vector<Scalar>> gradientDescent(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0, Real ε, Real γ0 = 1, Real δγ = 2) {
Vector<Scalar> z = z0;
Real γ = γ0;
auto [W, dW] = WdW(J, z);
while (W > ε) {
Vector<Scalar> zNew = normalize(z - γ * dW.conjugate());
auto [WNew, dWNew] = WdW(J, zNew);
if (WNew < W) { // If the step lowered the objective, accept it!
z = zNew;
W = WNew;
dW = dWNew;
γ = γ0;
} else { // Otherwise, shrink the step and try again.
γ /= δγ;
}
if (γ < 1e-50) {
throw gradientDescentStallException();
}
}
return {W, z};
}
template <class Real, class Scalar, int p>
Vector<Scalar> findSaddle(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0, Real ε, Real δW = 2, Real γ0 = 1, Real δγ = 2) {
Vector<Scalar> z = z0;
Vector<Scalar> ζ = euclideanToStereographic(z);
Real W;
std::tie(W, std::ignore) = WdW(J, z);
Vector<Scalar> dH;
Matrix<Scalar> ddH;
std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z);
while (W > ε) {
// ddH is complex symmetric, which is (almost always) invertible, so a
// partial pivot LU decomposition can be used.
Vector<Scalar> dζ = ddH.partialPivLu().solve(dH);
Vector<Scalar> ζNew = ζ - dζ;
Vector<Scalar> zNew = stereographicToEuclidean(ζNew);
Real WNew;
std::tie(WNew, std::ignore) = WdW(J, zNew);
if (WNew < W) { // If Newton's step lowered the objective, accept it!
ζ = ζNew;
z = zNew;
W = WNew;
} else { // Otherwise, do gradient descent until W is a factor δW smaller.
std::tie(W, z) = gradientDescent(J, z, W / δW, γ0, δγ);
ζ = euclideanToStereographic(z);
}
std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z);
}
return z;
}
template <class Scalar, class Distribution, class Generator>
Vector<Scalar> randomVector(unsigned N, Distribution d, Generator& r) {
Vector<Scalar> z(N);
for (unsigned i = 0; i < N; i++) {
z(i) = d(r);
}
return z;
}
template <class Real, class Scalar, int p, class Distribution, class Generator>
std::tuple<Real, Vector<Scalar>> metropolis(const Tensor<Scalar, p>& J, const Vector<Scalar>& z0,
std::function<Real(const Tensor<Scalar, p>&, const Vector<Scalar>&)>& energy,
Real T, Real γ, unsigned N, Distribution d, Generator& r) {
Vector<Scalar> z = z0;
Real E = energy(J, z);
std::uniform_real_distribution<Real> D(0, 1);
for (unsigned i = 0; i < N; i++) {
Vector<Scalar> zNew = normalize(z + γ * randomVector<Scalar>(z.size(), d, r));
Real ENew = energy(J, zNew);
if (E - ENew > T * log(D(r))) {
z = zNew;
E = ENew;
}
}
return {E, z};
}
|