summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-04-03 13:49:51 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-04-03 13:49:51 +0200
commit54894575bc470d9b6aeec423df6eeddec4b3f68d (patch)
tree676114cffe2b6125ec3db612c3b2688bc1cbb018
parent0c6efee6b0f099d71ab4daa9f79715e9dc2dc902 (diff)
downloadcode-54894575bc470d9b6aeec423df6eeddec4b3f68d.tar.gz
code-54894575bc470d9b6aeec423df6eeddec4b3f68d.tar.bz2
code-54894575bc470d9b6aeec423df6eeddec4b3f68d.zip
Added support for alternate (slower) models.
-rw-r--r--least_squares.cpp190
1 files changed, 157 insertions, 33 deletions
diff --git a/least_squares.cpp b/least_squares.cpp
index 3aa9948..2ece781 100644
--- a/least_squares.cpp
+++ b/least_squares.cpp
@@ -2,6 +2,7 @@
#include <getopt.h>
#include <eigen3/unsupported/Eigen/CXX11/Tensor>
+#include <random>
#include "pcg-cpp/include/pcg_random.hpp"
#include "randutils/randutils.hpp"
@@ -26,6 +27,10 @@ public:
const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20);
return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1));
}
+
+ Tensor operator+(const Eigen::Tensor<Real, 3>& J) const {
+ return Eigen::Tensor<Real, 3>::operator+(J);
+ }
};
Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) {
@@ -35,40 +40,79 @@ Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor&
return Eigen::Map<const Matrix>(JxT.data(), J.dimension(1), J.dimension(2));
}
+class Tensor4 : public Eigen::Tensor<Real, 4> {
+ using Eigen::Tensor<Real, 4>::Tensor;
+
+public:
+ Eigen::Tensor<Real, 3> operator*(const Vector& x) const {
+ const std::array<Eigen::IndexPair<int>, 1> ip30 = {Eigen::IndexPair<int>(3, 0)};
+ const Eigen::Tensor<Real, 1> xT = Eigen::TensorMap<const Eigen::Tensor<Real, 1>>(x.data(), x.size());
+ return contract(xT, ip30);
+ }
+};
+
Vector normalize(const Vector& x) {
return x * sqrt((Real)x.size() / x.squaredNorm());
}
-class QuadraticModel {
+class Model {
+public:
+ unsigned N;
+ unsigned M;
+
+ Model(unsigned N, unsigned M) : N(N), M(M) {}
+
+ virtual std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
+ return {0, Vector::Zero(N), Matrix::Zero(N, N)};
+ }
+
+ std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
+ auto [H, dH, ddH] = HdHddH(x);
+
+ Vector gradH = dH - dH.dot(x) * x / (Real)N;
+ Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose();
+
+ return {H, gradH, hessH};
+ }
+
+ Real getHamiltonian(const Vector& x) const {
+ Real H;
+ std::tie(H, std::ignore, std::ignore) = HdHddH(x);
+ return H;
+ }
+
+ Vector spectrum(const Vector& x) const {
+ Matrix hessH;
+ std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
+ Eigen::EigenSolver<Matrix> eigenS(hessH);
+ return eigenS.eigenvalues().real();
+ }
+};
+
+class QuadraticModel : public Model {
private:
Tensor J;
Matrix A;
Vector b;
public:
- unsigned N;
- unsigned M;
-
- template <class Generator>
- QuadraticModel(unsigned N, unsigned M, Generator& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) {
- std::normal_distribution<Real> distribution(0, 1);
-
+ QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : Model(N, M), J(M, N, N), A(M, N), b(M) {
for (unsigned i = 0; i < M; i++) {
for (unsigned j = 0; j < N; j++) {
for (unsigned k = 0; k < N; k++) {
- J(i, j, k) = (2 * μ3 / N) * distribution(r);
+ J(i, j, k) = (2 * μ3 / N) * r.variate<Real, std::normal_distribution>();
}
}
}
for (unsigned i = 0; i < M; i++) {
for (unsigned j = 0; j < N; j++) {
- A(i, j) = (μ2 / sqrt(N)) * distribution(r);
+ A(i, j) = (μ2 / sqrt(N)) * r.variate<Real, std::normal_distribution>();
}
}
for (unsigned i = 0; i < M; i++) {
- b(i) = μ1 * distribution(r);
+ b(i) = μ1 * r.variate<Real, std::normal_distribution>();
}
}
@@ -80,7 +124,7 @@ public:
return {b + V1, dV, J};
}
- std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
+ std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override {
auto [V, dV, ddV] = VdVddV(x);
Real H = 0.5 * V.squaredNorm();
@@ -89,25 +133,69 @@ public:
return {H, dH, ddH};
}
+};
- std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
- auto [H, dH, ddH] = HdHddH(x);
+class CubicModel : public Model {
+private:
+ Tensor4 J3;
+ Tensor J2;
+ Matrix A;
+ Vector b;
- Vector gradH = dH - dH.dot(x) * x / (Real)N;
- Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose();
+public:
+ CubicModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3, double μ4) : Model(N, M), J3(M, N, N, N), J2(M, N, N), A(M, N), b(M) {
+ for (unsigned i = 0; i < M; i++) {
+ for (unsigned j = 0; j < N; j++) {
+ for (unsigned k = 0; k < N; k++) {
+ for (unsigned l = 0; l < N; l++) {
+ J3(i, j, k, l) = (6 * μ4 / pow(N, 1.5)) * r.variate<Real, std::normal_distribution>();
+ }
+ }
+ }
+ }
- return {H, gradH, hessH};
+ for (unsigned i = 0; i < M; i++) {
+ for (unsigned j = 0; j < N; j++) {
+ for (unsigned k = 0; k < N; k++) {
+ J2(i, j, k) = (2 * μ3 / N) * r.variate<Real, std::normal_distribution>();
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < M; i++) {
+ for (unsigned j = 0; j < N; j++) {
+ A(i, j) = (μ2 / sqrt(N)) * r.variate<Real, std::normal_distribution>();
+ }
+ }
+
+ for (unsigned i = 0; i < M; i++) {
+ b(i) = μ1 * r.variate<Real, std::normal_distribution>();
+ }
}
- Vector spectrum(const Vector& x) const {
- Matrix hessH;
- std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
- Eigen::EigenSolver<Matrix> eigenS(hessH);
- return eigenS.eigenvalues().real();
+
+ std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const {
+ Tensor J3x = J3 * x;
+ Matrix J3xx = J3x * x;
+ Matrix J2x = J2 * x;
+ Vector V1 = (A + 0.5 * J2x + J3xx / 6.0) * x;
+ Matrix dV = A + J2x + 0.5 * J3xx;
+
+ return {b + V1, dV, J2 + J3x};
+ }
+
+ std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const override {
+ auto [V, dV, ddV] = VdVddV(x);
+
+ Real H = 0.5 * V.squaredNorm();
+ Vector dH = V.transpose() * dV;
+ Matrix ddH = V.transpose() * ddV + dV.transpose() * dV;
+
+ return {H, dH, ddH};
}
};
-Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε) {
+Vector findMinimum(const Model& M, const Vector& x0, Real ε) {
Vector x = x0;
Real λ = 100;
@@ -135,6 +223,35 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε) {
return x;
}
+Vector metropolisStep(const Model& M, const Vector& x0, Real β, Rng& r, Real ε = 1) {
+ Vector Δx(M.N);
+
+ for (Real& Δxᵢ : Δx) {
+ Δxᵢ = ε * r.variate<Real, std::normal_distribution>();
+ }
+
+ Vector xNew = normalize(x0 + Δx);
+
+ Real Hold = M.getHamiltonian(x0);
+ Real Hnew = M.getHamiltonian(xNew);
+
+ if (exp(-β * (Hnew - Hold)) > r.uniform<Real>(0.0, 0.1)) {
+ return xNew;
+ } else {
+ return x0;
+ }
+}
+
+Vector metropolisSweep(const Model& M, const Vector& x0, Real β, Rng& r, Real ε = 1) {
+ Vector x = x0;
+
+ for (unsigned i = 0; i < M.N; i++) {
+ x = metropolisStep(M, x, β, r, ε);
+ }
+
+ return x;
+}
+
int main(int argc, char* argv[]) {
unsigned N = 10;
Real α = 1;
@@ -170,21 +287,28 @@ int main(int argc, char* argv[]) {
Rng r;
- QuadraticModel leastSquares(N, M, r.engine(), σ, A, J);
+ QuadraticModel leastSquares(N, M, r, σ, A, J);
- Vector x = Vector::Zero(N);
- x(0) = sqrt(N);
+ Vector x0 = Vector::Zero(N);
+ x0(0) = sqrt(N);
- double energy;
- std::tie(energy, std::ignore, std::ignore) = leastSquares.hamGradHess(x);
+ std::cout << leastSquares.getHamiltonian(x0) / N << std::endl;
+
+ Vector xMin1 = findMinimum(leastSquares, x0, 1e-12);
+
+ std::cout << leastSquares.getHamiltonian(xMin1) / N << std::endl;
+
+ Vector x = x0;
+
+ for (unsigned i = 0; i < 10; i++) {
+ x = metropolisSweep(leastSquares, x, 0.5, r);
+ }
- std::cout << energy / N << std::endl;
+ std::cout << leastSquares.getHamiltonian(x) / N << std::endl;
- Vector xMin = findMinimum(leastSquares, x, 1e-12);
- std::tie(energy, std::ignore, std::ignore) = leastSquares.hamGradHess(xMin);
+ Vector xMin2 = findMinimum(leastSquares, x, 1e-12);
- std::cout << energy / N << std::endl;
- std::cout << leastSquares.spectrum(xMin)(1) / N << std::endl;
+ std::cout << leastSquares.getHamiltonian(xMin2) / N << std::endl;
return 0;
}