diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-03 13:49:51 +0200 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-03 13:49:51 +0200 |
commit | 54894575bc470d9b6aeec423df6eeddec4b3f68d (patch) | |
tree | 676114cffe2b6125ec3db612c3b2688bc1cbb018 | |
parent | 0c6efee6b0f099d71ab4daa9f79715e9dc2dc902 (diff) | |
download | code-54894575bc470d9b6aeec423df6eeddec4b3f68d.tar.gz code-54894575bc470d9b6aeec423df6eeddec4b3f68d.tar.bz2 code-54894575bc470d9b6aeec423df6eeddec4b3f68d.zip |
Added support for alternate (slower) models.
-rw-r--r-- | least_squares.cpp | 190 |
1 files changed, 157 insertions, 33 deletions
diff --git a/least_squares.cpp b/least_squares.cpp index 3aa9948..2ece781 100644 --- a/least_squares.cpp +++ b/least_squares.cpp @@ -2,6 +2,7 @@ #include <getopt.h> #include <eigen3/unsupported/Eigen/CXX11/Tensor> +#include <random> #include "pcg-cpp/include/pcg_random.hpp" #include "randutils/randutils.hpp" @@ -26,6 +27,10 @@ public: const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20); return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1)); } + + Tensor operator+(const Eigen::Tensor<Real, 3>& J) const { + return Eigen::Tensor<Real, 3>::operator+(J); + } }; Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) { @@ -35,40 +40,79 @@ Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& return Eigen::Map<const Matrix>(JxT.data(), J.dimension(1), J.dimension(2)); } +class Tensor4 : public Eigen::Tensor<Real, 4> { + using Eigen::Tensor<Real, 4>::Tensor; + +public: + Eigen::Tensor<Real, 3> operator*(const Vector& x) const { + const std::array<Eigen::IndexPair<int>, 1> ip30 = {Eigen::IndexPair<int>(3, 0)}; + const Eigen::Tensor<Real, 1> xT = Eigen::TensorMap<const Eigen::Tensor<Real, 1>>(x.data(), x.size()); + return contract(xT, ip30); + } +}; + Vector normalize(const Vector& x) { return x * sqrt((Real)x.size() / x.squaredNorm()); } -class QuadraticModel { +class Model { +public: + unsigned N; + unsigned M; + + Model(unsigned N, unsigned M) : N(N), M(M) {} + + virtual std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { + return {0, Vector::Zero(N), Matrix::Zero(N, N)}; + } + + std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { + auto [H, dH, ddH] = HdHddH(x); + + Vector gradH = dH - dH.dot(x) * x / (Real)N; + Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); + + return {H, gradH, hessH}; + } + + Real getHamiltonian(const Vector& x) const { + Real H; + std::tie(H, std::ignore, std::ignore) = HdHddH(x); + return H; + } + + Vector spectrum(const Vector& x) const { + Matrix hessH; + std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); + Eigen::EigenSolver<Matrix> eigenS(hessH); + return eigenS.eigenvalues().real(); + } +}; + +class QuadraticModel : public Model { private: Tensor J; Matrix A; Vector b; public: - unsigned N; - unsigned M; - - template <class Generator> - QuadraticModel(unsigned N, unsigned M, Generator& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) { - std::normal_distribution<Real> distribution(0, 1); - + QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : Model(N, M), J(M, N, N), A(M, N), b(M) { for (unsigned i = 0; i < M; i++) { for (unsigned j = 0; j < N; j++) { for (unsigned k = 0; k < N; k++) { - J(i, j, k) = (2 * μ3 / N) * distribution(r); + J(i, j, k) = (2 * μ3 / N) * r.variate<Real, std::normal_distribution>(); } } } for (unsigned i = 0; i < M; i++) { for (unsigned j = 0; j < N; j++) { - A(i, j) = (μ2 / sqrt(N)) * distribution(r); + A(i, j) = (μ2 / sqrt(N)) * r.variate<Real, std::normal_distribution>(); } } for (unsigned i = 0; i < M; i++) { - b(i) = μ1 * distribution(r); + b(i) = μ1 * r.variate<Real, std::normal_distribution>(); } } @@ -80,7 +124,7 @@ public: return {b + V1, dV, J}; } - std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { + std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override { auto [V, dV, ddV] = VdVddV(x); Real H = 0.5 * V.squaredNorm(); @@ -89,25 +133,69 @@ public: return {H, dH, ddH}; } +}; - std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { - auto [H, dH, ddH] = HdHddH(x); +class CubicModel : public Model { +private: + Tensor4 J3; + Tensor J2; + Matrix A; + Vector b; - Vector gradH = dH - dH.dot(x) * x / (Real)N; - Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); +public: + CubicModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3, double μ4) : Model(N, M), J3(M, N, N, N), J2(M, N, N), A(M, N), b(M) { + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < N; j++) { + for (unsigned k = 0; k < N; k++) { + for (unsigned l = 0; l < N; l++) { + J3(i, j, k, l) = (6 * μ4 / pow(N, 1.5)) * r.variate<Real, std::normal_distribution>(); + } + } + } + } - return {H, gradH, hessH}; + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < N; j++) { + for (unsigned k = 0; k < N; k++) { + J2(i, j, k) = (2 * μ3 / N) * r.variate<Real, std::normal_distribution>(); + } + } + } + + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < N; j++) { + A(i, j) = (μ2 / sqrt(N)) * r.variate<Real, std::normal_distribution>(); + } + } + + for (unsigned i = 0; i < M; i++) { + b(i) = μ1 * r.variate<Real, std::normal_distribution>(); + } } - Vector spectrum(const Vector& x) const { - Matrix hessH; - std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); - Eigen::EigenSolver<Matrix> eigenS(hessH); - return eigenS.eigenvalues().real(); + + std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const { + Tensor J3x = J3 * x; + Matrix J3xx = J3x * x; + Matrix J2x = J2 * x; + Vector V1 = (A + 0.5 * J2x + J3xx / 6.0) * x; + Matrix dV = A + J2x + 0.5 * J3xx; + + return {b + V1, dV, J2 + J3x}; + } + + std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override { + auto [V, dV, ddV] = VdVddV(x); + + Real H = 0.5 * V.squaredNorm(); + Vector dH = V.transpose() * dV; + Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; + + return {H, dH, ddH}; } }; -Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε) { +Vector findMinimum(const Model& M, const Vector& x0, Real ε) { Vector x = x0; Real λ = 100; @@ -135,6 +223,35 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε) { return x; } +Vector metropolisStep(const Model& M, const Vector& x0, Real β, Rng& r, Real ε = 1) { + Vector Δx(M.N); + + for (Real& Δxᵢ : Δx) { + Δxᵢ = ε * r.variate<Real, std::normal_distribution>(); + } + + Vector xNew = normalize(x0 + Δx); + + Real Hold = M.getHamiltonian(x0); + Real Hnew = M.getHamiltonian(xNew); + + if (exp(-β * (Hnew - Hold)) > r.uniform<Real>(0.0, 0.1)) { + return xNew; + } else { + return x0; + } +} + +Vector metropolisSweep(const Model& M, const Vector& x0, Real β, Rng& r, Real ε = 1) { + Vector x = x0; + + for (unsigned i = 0; i < M.N; i++) { + x = metropolisStep(M, x, β, r, ε); + } + + return x; +} + int main(int argc, char* argv[]) { unsigned N = 10; Real α = 1; @@ -170,21 +287,28 @@ int main(int argc, char* argv[]) { Rng r; - QuadraticModel leastSquares(N, M, r.engine(), σ, A, J); + QuadraticModel leastSquares(N, M, r, σ, A, J); - Vector x = Vector::Zero(N); - x(0) = sqrt(N); + Vector x0 = Vector::Zero(N); + x0(0) = sqrt(N); - double energy; - std::tie(energy, std::ignore, std::ignore) = leastSquares.hamGradHess(x); + std::cout << leastSquares.getHamiltonian(x0) / N << std::endl; + + Vector xMin1 = findMinimum(leastSquares, x0, 1e-12); + + std::cout << leastSquares.getHamiltonian(xMin1) / N << std::endl; + + Vector x = x0; + + for (unsigned i = 0; i < 10; i++) { + x = metropolisSweep(leastSquares, x, 0.5, r); + } - std::cout << energy / N << std::endl; + std::cout << leastSquares.getHamiltonian(x) / N << std::endl; - Vector xMin = findMinimum(leastSquares, x, 1e-12); - std::tie(energy, std::ignore, std::ignore) = leastSquares.hamGradHess(xMin); + Vector xMin2 = findMinimum(leastSquares, x, 1e-12); - std::cout << energy / N << std::endl; - std::cout << leastSquares.spectrum(xMin)(1) / N << std::endl; + std::cout << leastSquares.getHamiltonian(xMin2) / N << std::endl; return 0; } |