diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-12 00:45:11 +0200 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-12 00:45:11 +0200 |
commit | 4bdde3b2c1b6ea5e828a987211437f30587356ae (patch) | |
tree | 4efa232bdad993bb35a1c05b157a83c6436fa140 /least_squares.cpp | |
parent | 39b71d6cf10615dfb16cc72cf2c1bb859d9a6892 (diff) | |
download | code-4bdde3b2c1b6ea5e828a987211437f30587356ae.tar.gz code-4bdde3b2c1b6ea5e828a987211437f30587356ae.tar.bz2 code-4bdde3b2c1b6ea5e828a987211437f30587356ae.zip |
Significant refactoring, and fixed a big error related to symmetry of coupling matrix and assumptions about derivatives.
Diffstat (limited to 'least_squares.cpp')
-rw-r--r-- | least_squares.cpp | 225 |
1 files changed, 106 insertions, 119 deletions
diff --git a/least_squares.cpp b/least_squares.cpp index 2ede87c..7b9c2c9 100644 --- a/least_squares.cpp +++ b/least_squares.cpp @@ -1,5 +1,6 @@ #include <eigen3/Eigen/Dense> #include <eigen3/unsupported/Eigen/CXX11/Tensor> +#include <eigen3/unsupported/Eigen/CXX11/TensorSymmetry> #include <getopt.h> #include "pcg-cpp/include/pcg_random.hpp" @@ -7,7 +8,7 @@ using Rng = randutils::random_generator<pcg32>; -using Real = float; +using Real = double; using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>; using Matrix = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>; @@ -25,10 +26,6 @@ public: const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20); return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1)); } - - Tensor operator+(const Eigen::Tensor<Real, 3>& J) const { - return Eigen::Tensor<Real, 3>::operator+(J); - } }; Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) { @@ -38,67 +35,41 @@ Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& return Eigen::Map<const Matrix>(JxT.data(), J.dimension(1), J.dimension(2)); } -class Tensor4 : public Eigen::Tensor<Real, 4> { - using Eigen::Tensor<Real, 4>::Tensor; - -public: - Eigen::Tensor<Real, 3> operator*(const Vector& x) const { - const std::array<Eigen::IndexPair<int>, 1> ip30 = {Eigen::IndexPair<int>(3, 0)}; - const Eigen::Tensor<Real, 1> xT = Eigen::TensorMap<const Eigen::Tensor<Real, 1>>(x.data(), x.size()); - return contract(xT, ip30); - } -}; - Vector normalize(const Vector& x) { return x * sqrt((Real)x.size() / x.squaredNorm()); } -class Model { -public: - unsigned N; - unsigned M; - - Model(unsigned N, unsigned M) : N(N), M(M) {} - - virtual std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { - return {0, Vector::Zero(N), Matrix::Zero(N, N)}; - } - - std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { - auto [H, dH, ddH] = HdHddH(x); - - Vector gradH = dH - dH.dot(x) * x / (Real)N; - Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); +Vector makeTangent(const Vector& v, const Vector& x) { + return v - (v.dot(x) / x.squaredNorm()) * x; +} - return {H, gradH, hessH}; - } +Real HFromV(const Vector& V) { + return 0.5 * V.squaredNorm(); +} - virtual Real getHamiltonian(const Vector& x) const { - Real H; - std::tie(H, std::ignore, std::ignore) = HdHddH(x); - return H; - } +Vector dHFromVdV(const Vector& V, const Matrix& dV) { + return V.transpose() * dV; +} - Vector spectrum(const Vector& x) const { - Matrix hessH; - std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); - Eigen::EigenSolver<Matrix> eigenS(hessH); - return eigenS.eigenvalues().real(); - } -}; +Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) { + return b + (A + 0.5 * Jx) * x; +} -class QuadraticModel : public Model { +class QuadraticModel { private: Tensor J; Matrix A; Vector b; public: - QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : Model(N, M), J(M, N, N), A(M, N), b(M) { + unsigned N; + unsigned M; + QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) { + Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1; for (unsigned k = 0; k < N; k++) { - for (unsigned j = 0; j < N; j++) { + for (unsigned j = k; j < N; j++) { for (unsigned i = 0; i < M; i++) { - J(i, j, k) = r.variate<Real, std::normal_distribution>(0, 2 * μ3 / N); + ssym1(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2) * μ3 / N); } } } @@ -114,17 +85,17 @@ public: std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const { Matrix Jx = J * x; - Vector V = b + (A + 0.5 * Jx) * x; + Vector V = VFromABJ(b, A, Jx, x); Matrix dV = A + Jx; return {V, dV, J}; } - std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override { + std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { auto [V, dV, ddV] = VdVddV(x); - Real H = 0.5 * V.squaredNorm(); - Vector dH = V.transpose() * dV; + Real H = HFromV(V); + Vector dH = dHFromVdV(V, dV); Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; return {H, dH, ddH}; @@ -134,99 +105,114 @@ public: * in those execution paths getting optimized out. It is much more efficient to compute the * energy alone when only the energy is needed. */ - Real getHamiltonian(const Vector& x) const override { - Vector V = b + (A + 0.5 * (J * x)) * x; - return 0.5 * V.squaredNorm(); + Real getHamiltonian(const Vector& x) const { + return HFromV(VFromABJ(b, A, J * x, x)); } -}; -class CubicModel : public Model { -private: - Tensor4 J3; - Tensor J2; - Matrix A; - Vector b; + std::tuple<Real, Vector> getHamGrad(const Vector& x) const { + Vector V; + Matrix dV; + std::tie(V, dV, std::ignore) = VdVddV(x); -public: - CubicModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3, double μ4) : Model(N, M), J3(M, N, N, N), J2(M, N, N), A(M, N), b(M) { - for (unsigned i = 0; i < M; i++) { - for (unsigned j = 0; j < N; j++) { - for (unsigned k = 0; k < N; k++) { - for (unsigned l = 0; l < N; l++) { - J3(i, j, k, l) = (6 * μ4 / pow(N, 1.5)) * r.variate<Real, std::normal_distribution>(); - } - } - } - } + Real H = HFromV(V); + Vector dH = makeTangent(dHFromVdV(V, dV), x); - for (unsigned k = 0; k < N; k++) { - for (unsigned j = 0; j < N; j++) { - for (unsigned i = 0; i < M; i++) { - J2(i, j, k) = r.variate<Real, std::normal_distribution>(0, 2 * μ3 / N); - } - } - } + return {H, dH}; + } - for (Real& Aij : A.reshaped()) { - Aij = r.variate<Real, std::normal_distribution>(0, μ2 / sqrt(N)); - } + std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { + auto [H, dH, ddH] = HdHddH(x); - for (Real& bi : b) { - bi = r.variate<Real, std::normal_distribution>(0, μ1); - } - } + Real dHx = dH.dot(x) / N; + Vector gradH = dH - dHx * x; + Matrix hessH = ddH - dHx * Matrix::Identity(N, N) - ((dH + ddH * x) / N - 2 * x) * x.transpose(); - std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const { - Tensor J3x = J3 * x; - Matrix J3xx = J3x * x; - Matrix J2x = J2 * x; - Vector V1 = (A + 0.5 * J2x + J3xx / 6.0) * x; - Matrix dV = A + J2x + 0.5 * J3xx; + return {H, gradH, hessH}; + } - return {b + V1, dV, J2 + J3x}; + Vector spectrum(const Vector& x) const { + Matrix hessH; + std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); + Eigen::EigenSolver<Matrix> eigenS(hessH); + return eigenS.eigenvalues().real(); } +}; - std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override { - auto [V, dV, ddV] = VdVddV(x); +Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7) { + Vector x = x0; + Real λ = 10; - Real H = 0.5 * V.squaredNorm(); - Vector dH = V.transpose() * dV; - Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; + auto [H, g] = M.getHamGrad(x); - return {H, dH, ddH}; + while (g.norm() / M.N > ε && λ > ε) { + Real HNew; + Vector xNew, gNew; + + while( + xNew = normalize(x + λ * g), + std::tie(HNew, gNew) = M.getHamGrad(xNew), + HNew < H && λ > ε + ) { + λ /= 1.5; + } + + x = xNew; + H = HNew; + g = gNew; + + λ *= 2; } -}; -Vector findMinimum(const Model& M, const Vector& x0, Real ε = 1e-12) { + return x; +} + +Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-12) { Vector x = x0; Real λ = 100; auto [H, g, m] = M.hamGradHess(x0); - while (g.norm() / M.N > ε && λ < 1e8) { - Vector dx = (m + λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g); - dx -= x.dot(dx) * x / M.N; - Vector xNew = normalize(x - dx); - - auto [HNew, gNew, mNew] = M.hamGradHess(xNew); + while (g.norm() / M.N > ε && λ * ε < 1) { + while (λ * ε < 1) { + Vector dx = (m - λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g); + dx -= x.dot(dx) * x / M.N; + Vector xNew = normalize(x - dx); - if (HNew < H) { - x = xNew; - H = HNew; - g = gNew; - m = mNew; + auto [HNew, gNew, mNew] = M.hamGradHess(xNew); - λ /= 2; - } else { - λ *= 1.5; + if (HNew > H) { + x = xNew; + H = HNew; + g = gNew; + m = mNew; + + λ /= 2; + break; + } else { + λ *= 1.5; + } } } return x; } -Vector metropolisSweep(const Model& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real ε = 1) { +Vector findSaddle(const QuadraticModel& M, const Vector& x0, Real ε = 1e-12) { + Vector x = x0; + Vector g; + Matrix m; + + while (std::tie(std::ignore, g, m) = M.hamGradHess(x), g.norm() / M.N > ε) { + Vector dx = m.partialPivLu().solve(g); + dx -= (x.dot(dx) / M.N) * x; + x = normalize(x - dx); + } + + return x; +} + +Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real ε = 1) { Vector x = x0; Real H = M.getHamiltonian(x); @@ -316,6 +302,7 @@ int main(int argc, char* argv[]) { for (unsigned sample = 0; sample < samples; sample++) { QuadraticModel leastSquares(N, M, r, σ, A, J); x = metropolisSweep(leastSquares, x, β, r, sweeps); + std::cout << " " << leastSquares.getHamiltonian(x) / N; x = findMinimum(leastSquares, x); std::cout << " " << leastSquares.getHamiltonian(x) / N; } |