summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--least_squares.cpp176
1 files changed, 130 insertions, 46 deletions
diff --git a/least_squares.cpp b/least_squares.cpp
index f69f117..51c79c6 100644
--- a/least_squares.cpp
+++ b/least_squares.cpp
@@ -26,6 +26,10 @@ public:
const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20);
return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1));
}
+
+ Tensor operator+(const Eigen::Tensor<Real, 3>& J) const {
+ return Eigen::Tensor<Real, 3>::operator+(J);
+ }
};
Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) {
@@ -35,6 +39,17 @@ Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor&
return Eigen::Map<const Matrix>(JxT.data(), J.dimension(1), J.dimension(2));
}
+class Tensor4 : public Eigen::Tensor<Real, 4> {
+ using Eigen::Tensor<Real, 4>::Tensor;
+
+public:
+ Eigen::Tensor<Real, 3> operator*(const Vector& x) const {
+ const std::array<Eigen::IndexPair<int>, 1> ip30 = {Eigen::IndexPair<int>(3, 0)};
+ const Eigen::Tensor<Real, 1> xT = Eigen::TensorMap<const Eigen::Tensor<Real, 1>>(x.data(), x.size());
+ return contract(xT, ip30);
+ }
+};
+
Vector normalize(const Vector& x) {
return x * sqrt((Real)x.size() / x.squaredNorm());
}
@@ -55,16 +70,70 @@ Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector
return b + (A + 0.5 * Jx) * x;
}
-class QuadraticModel {
+class Model {
+public:
+ unsigned N;
+ unsigned M;
+
+ Model(unsigned N, unsigned M) : N(N), M(M) {}
+
+ virtual std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const {
+ std::cerr << "VdVddV needs to be defined in your specialization of this class!" << std::endl;
+ return {Vector::Zero(M), Matrix::Zero(M, N), Tensor(M, N, N)};
+ }
+
+ std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
+ auto [V, dV, ddV] = VdVddV(x);
+
+ Real H = HFromV(V);
+ Vector dH = dHFromVdV(V, dV);
+ Matrix ddH = V.transpose() * ddV + dV.transpose() * dV;
+
+ return {H, dH, ddH};
+ }
+
+ std::tuple<Real, Vector> getHamGrad(const Vector& x) const {
+ Vector V;
+ Matrix dV;
+ std::tie(V, dV, std::ignore) = VdVddV(x);
+
+ Real H = HFromV(V);
+ Vector dH = makeTangent(dHFromVdV(V, dV), x);
+
+ return {H, dH};
+ }
+
+ std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
+ auto [H, dH, ddH] = HdHddH(x);
+
+ Vector gradH = dH - dH.dot(x) * x / (Real)N;
+ Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose();
+
+ return {H, gradH, hessH};
+ }
+
+ virtual Real getHamiltonian(const Vector& x) const {
+ Real H;
+ std::tie(H, std::ignore, std::ignore) = HdHddH(x);
+ return H;
+ }
+
+ Vector spectrum(const Vector& x) const {
+ Matrix hessH;
+ std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
+ Eigen::EigenSolver<Matrix> eigenS(hessH);
+ return eigenS.eigenvalues().real();
+ }
+};
+
+class QuadraticModel : public Model{
private:
Tensor J;
Matrix A;
Vector b;
public:
- unsigned N;
- unsigned M;
- QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) {
+ QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : Model(N, M), J(M, N, N), A(M, N), b(M) {
Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1;
for (unsigned k = 0; k < N; k++) {
for (unsigned j = k; j < N; j++) {
@@ -83,7 +152,7 @@ public:
}
}
- std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const {
+ std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const override {
Matrix Jx = J * x;
Vector V = VFromABJ(b, A, Jx, x);
Matrix dV = A + Jx;
@@ -91,55 +160,69 @@ public:
return {V, dV, J};
}
- std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
- auto [V, dV, ddV] = VdVddV(x);
-
- Real H = HFromV(V);
- Vector dH = dHFromVdV(V, dV);
- Matrix ddH = V.transpose() * ddV + dV.transpose() * dV;
-
- return {H, dH, ddH};
- }
-
/* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result
* in those execution paths getting optimized out. It is much more efficient to compute the
* energy alone when only the energy is needed.
*/
- Real getHamiltonian(const Vector& x) const {
+ Real getHamiltonian(const Vector& x) const override {
return HFromV(VFromABJ(b, A, J * x, x));
}
+};
- std::tuple<Real, Vector> getHamGrad(const Vector& x) const {
- Vector V;
- Matrix dV;
- std::tie(V, dV, std::ignore) = VdVddV(x);
+class CubicModel : public Model {
+private:
+ Tensor4 J3;
+ Tensor J2;
+ Matrix A;
+ Vector b;
- Real H = HFromV(V);
- Vector dH = makeTangent(dHFromVdV(V, dV), x);
+public:
+ CubicModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3, double μ4) : Model(N, M), J3(M, N, N, N), J2(M, N, N), A(M, N), b(M) {
+ Eigen::StaticSGroup<Eigen::Symmetry<1,2>, Eigen::Symmetry<2,3>> ssym2;
+ for (unsigned i = 0; i < M; i++) {
+ for (unsigned j = 0; j < N; j++) {
+ for (unsigned k = j; k < N; k++) {
+ for (unsigned l = k; l < N; l++) {
+ ssym2(J3, i, j, k, l) = (sqrt(6) * μ4 / pow(N, 1.5)) * r.variate<Real, std::normal_distribution>();
+ }
+ }
+ }
+ }
- return {H, dH};
- }
+ Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1;
+ for (unsigned k = 0; k < N; k++) {
+ for (unsigned j = k; j < N; j++) {
+ for (unsigned i = 0; i < M; i++) {
+ ssym1(J2, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2) * μ3 / N);
+ }
+ }
+ }
- std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
- auto [H, dH, ddH] = HdHddH(x);
+ for (Real& Aij : A.reshaped()) {
+ Aij = r.variate<Real, std::normal_distribution>(0, μ2 / sqrt(N));
+ }
- Real dHx = dH.dot(x) / N;
+ for (Real& bi : b) {
+ bi = r.variate<Real, std::normal_distribution>(0, μ1);
+ }
+ }
- Vector gradH = dH - dHx * x;
- Matrix hessH = ddH - dHx * Matrix::Identity(N, N) - ((dH + ddH * x) / N - 2 * x) * x.transpose();
+ std::tuple<Vector, Matrix, Tensor> VdVddV(const Vector& x) const override {
+ Tensor J3x = J3 * x;
+ Matrix J3xx = J3x * x;
+ Matrix J2x = J2 * x;
+ Vector V1 = (A + 0.5 * J2x + J3xx / 6.0) * x;
+ Matrix dV = A + J2x + 0.5 * J3xx;
- return {H, gradH, hessH};
+ return {b + V1, dV, J2 + J3x};
}
- Vector spectrum(const Vector& x) const {
- Matrix hessH;
- std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
- Eigen::EigenSolver<Matrix> eigenS(hessH);
- return eigenS.eigenvalues().real();
+ Real getHamiltonian(const Vector& x) const override {
+ return HFromV(VFromABJ(b, A, J2 * x + ((Tensor)(J3 * x) * x) / 3.0, x));
}
};
-Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7) {
+Vector gradientDescent(const Model& M, const Vector& x0, Real ε = 1e-7) {
Vector x = x0;
Real λ = 10;
@@ -167,7 +250,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7
return x;
}
-Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) {
+Vector findMinimum(const Model& M, const Vector& x0, Real ε = 1e-5) {
Vector x = x0;
Real λ = 100;
@@ -191,21 +274,22 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) {
return x;
}
-Vector findSaddle(const QuadraticModel& M, const Vector& x0, Real ε = 1e-12) {
+Vector findSaddle(const Model& M, const Vector& x0, Real ε = 1e-12) {
Vector x = x0;
- Vector g;
+ Vector dx, g;
Matrix m;
- while (std::tie(std::ignore, g, m) = M.hamGradHess(x), g.norm() / M.N > ε) {
- Vector dx = m.partialPivLu().solve(g);
- dx -= (x.dot(dx) / M.N) * x;
+ while (
+ std::tie(std::ignore, g, m) = M.hamGradHess(x),
+ dx = makeTangent(m.partialPivLu().solve(g), x),
+ dx.norm() > ε) {
x = normalize(x - dx);
}
return x;
}
-Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real ε = 1) {
+Vector metropolisSweep(const Model& M, const Vector& x0, Real β, Rng& r, unsigned sweeps = 1, Real δ = 1) {
Vector x = x0;
Real H = M.getHamiltonian(x);
@@ -216,7 +300,7 @@ Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng&
Vector xNew = x;
for (Real& xNewᵢ : xNew) {
- xNewᵢ += ε * r.variate<Real, std::normal_distribution>();
+ xNewᵢ += δ * r.variate<Real, std::normal_distribution>();
}
xNew = normalize(xNew);
@@ -231,9 +315,9 @@ Vector metropolisSweep(const QuadraticModel& M, const Vector& x0, Real β, Rng&
}
if (rate / M.N < 0.5) {
- ε /= 1.5;
+ δ /= 1.5;
} else {
- ε *= 1.5;
+ δ *= 1.5;
}
}