#pragma once #include #include #include #include "p-spin.hpp" #include "stereographic.hpp" class gradientDescentStallException: public std::exception { virtual const char* what() const throw() { return "Gradient descent stalled."; } }; template std::tuple> gradientDescent(const Tensor& J, const Vector& z0, double ε, double γ0 = 1, double δγ = 2) { Vector z = z0; double γ = γ0; auto [W, dW] = WdW(J, z); while (W > ε) { Vector zNewTmp = z - γ * dW.conjugate(); Vector zNew = normalize(zNewTmp); auto [WNew, dWNew] = WdW(J, zNew); if (WNew < W) { // If the step lowered the objective, accept it! z = zNew; W = WNew; dW = dWNew; γ = γ0; } else { // Otherwise, shrink the step and try again. γ /= δγ; } if (γ < 1e-50) { throw gradientDescentStallException(); } } return {W, z}; } template Vector findSaddle(const Tensor& J, const Vector& z0, double ε, double δW = 2, double γ0 = 1, double δγ = 2) { Vector z = z0; Vector ζ = euclideanToStereographic(z); double W; std::tie(W, std::ignore) = WdW(J, z); Vector dH; Matrix ddH; std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z); while (W > ε) { // ddH is complex symmetric, which is (almost always) invertible, so a // partial pivot LU decomposition can be used. Vector dζ = ddH.partialPivLu().solve(dH); Vector ζNew = ζ - dζ; Vector zNew = stereographicToEuclidean(ζNew); double WNew; std::tie(WNew, std::ignore) = WdW(J, zNew); if (WNew < W) { // If Newton's step lowered the objective, accept it! ζ = ζNew; z = zNew; W = WNew; } else { // Otherwise, do gradient descent until W is a factor δW smaller. std::tie(W, z) = gradientDescent(J, z, W / δW, γ0, δγ); ζ = euclideanToStereographic(z); } std::tie(std::ignore, dH, ddH) = stereographicHamGradHess(J, ζ, z); } return z; } template Vector randomVector(unsigned N, Distribution d, Generator& r) { Vector z(N); for (unsigned i = 0; i < N; i++) { z(i) = d(r); } return z; } template std::tuple> metropolis(const Tensor& J, const Vector& z0, std::function&, const Vector&)>& energy, double T, double γ, unsigned N, Distribution d, Generator& r) { Vector z = z0; double E = energy(J, z); std::uniform_real_distribution D(0, 1); for (unsigned i = 0; i < N; i++) { Vector zNewTmp = z + γ * randomVector(z.size(), d, r); Vector zNew = normalize(zNewTmp); double ENew = energy(J, zNew); if (E - ENew > T * log(D(r))) { z = zNew; E = ENew; } } return {E, z}; }