diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-12 13:12:56 +0200 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-04-12 13:12:56 +0200 |
commit | 1526cf131ebba7213f204bec383dcf2baf60d12b (patch) | |
tree | ad034a30bab3044854d099f6111bbed667ed6f33 | |
parent | c0636131197d2d48de5b42b42411e16c55429d7d (diff) | |
download | code-1526cf131ebba7213f204bec383dcf2baf60d12b.tar.gz code-1526cf131ebba7213f204bec383dcf2baf60d12b.tar.bz2 code-1526cf131ebba7213f204bec383dcf2baf60d12b.zip |
Restored cubic model class and refactored somewhat.
-rw-r--r-- | least_squares.cpp | 176 |
1 files changed, 130 insertions, 46 deletions
diff --git a/least_squares.cpp b/least_squares.cpp index f69f117..51c79c6 100644 --- a/least_squares.cpp +++ b/least_squares.cpp @@ -26,6 +26,10 @@ public: const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20); return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1)); } + + Tensor operator+(const Eigen::Tensor<Real, 3>& J) const { + return Eigen::Tensor<Real, 3>::operator+(J); + } }; Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) { @@ -35,6 +39,17 @@ Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& return Eigen::Map<const Matrix>(JxT.data(), J.dimension(1), J.dimension(2)); } +class Tensor4 : public Eigen::Tensor<Real, 4> { + using Eigen::Tensor<Real, 4>::Tensor; + +public: + Eigen::Tensor<Real, 3> operator*(const Vector& x) const { + const std::array<Eigen::IndexPair<int>, 1> ip30 = {Eigen::IndexPair<int>(3, 0)}; + const Eigen::Tensor<Real, 1> xT = Eigen::TensorMap<const Eigen::Tensor<Real, 1>>(x.data(), x.size()); + return contract(xT, ip30); + } +}; + Vector normalize(const Vector& x) { return x * sqrt((Real)x.size() / x.squaredNorm()); } @@ -55,16 +70,70 @@ Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector return b + (A + 0.5 * Jx) * x; } -class QuadraticModel { +class Model { +public: + unsigned N; + unsigned M; + + Model(unsigned N, unsigned M) : N(N), M(M) {} + + virtual std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const { + std::cerr << "VdVddV needs to be defined in your specialization of this class!" << std::endl; + return {Vector::Zero(M), Matrix::Zero(M, N), Tensor(M, N, N)}; + } + + std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { + auto [V, dV, ddV] = VdVddV(x); + + Real H = HFromV(V); + Vector dH = dHFromVdV(V, dV); + Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; + + return {H, dH, ddH}; + } + + std::tuple<Real, Vector> getHamGrad(const Vector& x) const { + Vector V; + Matrix dV; + std::tie(V, dV, std::ignore) = VdVddV(x); + + Real H = HFromV(V); + Vector dH = makeTangent(dHFromVdV(V, dV), x); + + return {H, dH}; + } + + std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { + auto [H, dH, ddH] = HdHddH(x); + + Vector gradH = dH - dH.dot(x) * x / (Real)N; + Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); + + return {H, gradH, hessH}; + } + + virtual Real getHamiltonian(const Vector& x) const { + Real H; + std::tie(H, std::ignore, std::ignore) = HdHddH(x); + return H; + } + + Vector spectrum(const Vector& x) const { + Matrix hessH; + std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); + Eigen::EigenSolver<Matrix> eigenS(hessH); + return eigenS.eigenvalues().real(); + } +}; + +class QuadraticModel : public Model{ private: Tensor J; Matrix A; Vector b; public: - unsigned N; - unsigned M; - QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) { + QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : Model(N, M), J(M, N, N), A(M, N), b(M) { Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1; for (unsigned k = 0; k < N; k++) { for (unsigned j = k; j < N; j++) { @@ -83,7 +152,7 @@ public: } } - std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const { + std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const override { Matrix Jx = J * x; Vector V = VFromABJ(b, A, Jx, x); Matrix dV = A + Jx; @@ -91,55 +160,69 @@ public: return {V, dV, J}; } - std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { - auto [V, dV, ddV] = VdVddV(x); - - Real H = HFromV(V); - Vector dH = dHFromVdV(V, dV); - Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; - - return {H, dH, ddH}; - } - /* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result * in those execution paths getting optimized out. It is much more efficient to compute the * energy alone when only the energy is needed. */ - Real getHamiltonian(const Vector& x) const { + Real getHamiltonian(const Vector& x) const override { return HFromV(VFromABJ(b, A, J * x, x)); } +}; - std::tuple<Real, Vector> getHamGrad(const Vector& x) const { - Vector V; - Matrix dV; - std::tie(V, dV, std::ignore) = VdVddV(x); +class CubicModel : public Model { +private: + Tensor4 J3; + Tensor J2; + Matrix A; + Vector b; - Real H = HFromV(V); - Vector dH = makeTangent(dHFromVdV(V, dV), x); +public: + CubicModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3, double μ4) : Model(N, M), J3(M, N, N, N), J2(M, N, N), A(M, N), b(M) { + Eigen::StaticSGroup<Eigen::Symmetry<1,2>, Eigen::Symmetry<2,3>> ssym2; + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < N; j++) { + for (unsigned k = j; k < N; k++) { + for (unsigned l = k; l < N; l++) { + ssym2(J3, i, j, k, l) = (sqrt(6) * μ4 / pow(N, 1.5)) * r.variate<Real, std::normal_distribution>(); + } + } + } + } - return {H, dH}; - } + Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1; + for (unsigned k = 0; k < N; k++) { + for (unsigned j = k; j < N; j++) { + for (unsigned i = 0; i < M; i++) { + ssym1(J2, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2) * μ3 / N); + } + } + } - std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { - auto [H, dH, ddH] = HdHddH(x); + for (Real& Aij : A.reshaped()) { + Aij = r.variate<Real, std::normal_distribution>(0, μ2 / sqrt(N)); + } - Real dHx = dH.dot(x) / N; + for (Real& bi : b) { + bi = r.variate<Real, std::normal_distribution>(0, μ1); + } + } - Vector gradH = dH - dHx * x; - Matrix hessH = ddH - dHx * Matrix::Identity(N, N) - ((dH + ddH * x) / N - 2 * x) * x.transpose(); + std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const override { + Tensor J3x = J3 * x; + Matrix J3xx = J3x * x; + Matrix J2x = J2 * x; + Vector V1 = (A + 0.5 * J2x + J3xx / 6.0) * x; + Matrix dV = A + J2x + 0.5 * J3xx; - return {H, gradH, hessH}; + return {b + V1, dV, J2 + J3x}; } - Vector spectrum(const Vector& x) const { - Matrix hessH; - std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); - Eigen::EigenSolver<Matrix> eigenS(hessH); - return eigenS.eigenvalues().real(); + Real getHamiltonian(const Vector& x) const override { + return HFromV(VFromABJ(b, A, J2 * x + ((Tensor)(J3 * x) * x) / 3.0, x)); } }; -Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7) { +Vector gradientDescent(const Model& M, const Vector& x0, Real ε = 1e-7) { Vector x = x0; Real λ = 10; @@ -167,7 +250,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7 return x; } -Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) { +Vector findMinimum(const Model& M, const Vector& x0, Real ε = 1e-5) { Vector x = x0; Real λ = 100; @@ -191,21 +274,22 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) { return x; } -Vector findSaddle(const QuadraticModel& M, const Vector& x0, Real ε = 1e-12) { +Vector findSaddle(const Model& M, const Vector& x0, Real ε = 1e-12) { Vector x = x0; - Vector g; + Vector dx, g; Matrix m; - while (std::tie(std::ignore, g, m) = M.hamGradHess(x), g.norm() / M.N > ε) { - Vector dx = m.partialPivLu().solve(g); - dx -= (x.dot(dx) / M.N) * x; + while ( + std::tie(std::ignore, g, m) = M.hamGradHess(x), + dx = makeTangent(m.partialPivLu().solve(g), x), + dx.norm() > ε) { x = normalize(x - dx); } return x; } -Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real ε = 1) { +Vector metropolisSweep(const Model& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real δ = 1) { Vector x = x0; Real H = M.getHamiltonian(x); @@ -216,7 +300,7 @@ Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng& Vector xNew = x; for (Real& xNewᵢ : xNew) { - xNewᵢ += ε * r.variate<Real, std::normal_distribution>(); + xNewᵢ += δ * r.variate<Real, std::normal_distribution>(); } xNew = normalize(xNew); @@ -231,9 +315,9 @@ Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng& } if (rate / M.N < 0.5) { - ε /= 1.5; + δ /= 1.5; } else { - ε *= 1.5; + δ *= 1.5; } } |