summaryrefslogtreecommitdiff
path: root/least_squares.cpp
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-04-21 11:12:27 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-04-21 11:12:27 +0200
commit93bf3372797dcfb14c49de1b1c17a2c217cb9950 (patch)
tree3e19e7ba035e1a580fac5d0a51a745be5040a4e6 /least_squares.cpp
parent7d837079872369869fe1972b138f230fb13c8d91 (diff)
downloadcode-93bf3372797dcfb14c49de1b1c17a2c217cb9950.tar.gz
code-93bf3372797dcfb14c49de1b1c17a2c217cb9950.tar.bz2
code-93bf3372797dcfb14c49de1b1c17a2c217cb9950.zip
Did some nice refactoring.
Diffstat (limited to 'least_squares.cpp')
-rw-r--r--least_squares.cpp114
1 files changed, 52 insertions, 62 deletions
diff --git a/least_squares.cpp b/least_squares.cpp
index b5b3a9a..b294679 100644
--- a/least_squares.cpp
+++ b/least_squares.cpp
@@ -12,10 +12,6 @@ using Real = double;
using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>;
using Matrix = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>;
-/* Eigen tensor manipulations are quite annoying, especially with the need to convert other types
- * into tensors beforehand. Here I overload multiplication operators to allow contraction between
- * vectors and the first or last index of a tensor.
- */
class Tensor : public Eigen::Tensor<Real, 3> {
using Eigen::Tensor<Real, 3>::Tensor;
@@ -26,10 +22,6 @@ public:
const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20);
return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1));
}
-
- Tensor operator+(const Eigen::Tensor<Real, 3>& J) const {
- return Eigen::Tensor<Real, 3>::operator+(J);
- }
};
Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) {
@@ -51,11 +43,11 @@ Real HFromV(const Vector& V) {
return 0.5 * V.squaredNorm();
}
-Vector dHFromVdV(const Vector& V, const Matrix& dV) {
- return V.transpose() * dV;
+Vector dHFromVdV(const Vector& V, const Matrix& ∂V) {
+ return V.transpose() * ∂V;
}
-Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) {
+Vector VFromABJx(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) {
return b + (A + 0.5 * Jx) * x;
}
@@ -65,15 +57,35 @@ private:
Matrix A;
Vector b;
+ std::tuple<Vector, Matrix, const Tensor&> V_∂V_∂∂V(const Vector& x) const {
+ Matrix Jx = J * x;
+ Vector V = VFromABJx(b, A, Jx, x);
+ Matrix ∂V = A + Jx;
+
+ return {V, ∂V, J};
+ }
+
+ std::tuple<Real, Vector, Matrix> H_∂H_∂∂H(const Vector& x) const {
+ auto [V, ∂V, ∂∂V] = V_∂V_∂∂V(x);
+
+ Real H = HFromV(V);
+ Vector ∂H = dHFromVdV(V, ∂V);
+ Matrix ∂∂H = V.transpose() * ∂∂V + ∂V.transpose() * ∂V;
+
+ return {H, ∂H, ∂∂H};
+ }
+
public:
unsigned N;
unsigned M;
- QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) {
- Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1;
+
+ QuadraticModel(unsigned N, unsigned M, Rng& r, Real μ1, Real μ2, Real μ3) : J(M, N, N), A(M, N), b(M), N(N), M(M) {
+ Eigen::StaticSGroup<Eigen::Symmetry<1,2>> sym23;
+
for (unsigned k = 0; k < N; k++) {
for (unsigned j = k; j < N; j++) {
for (unsigned i = 0; i < M; i++) {
- ssym1(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N);
+ sym23(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N);
}
}
}
@@ -87,55 +99,33 @@ public:
}
}
- std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const {
- Matrix Jx = J * x;
- Vector V = VFromABJ(b, A, Jx, x);
- Matrix dV = A + Jx;
-
- return {V, dV, J};
- }
-
- std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const {
- auto [V, dV, ddV] = VdVddV(x);
-
- Real H = HFromV(V);
- Vector dH = dHFromVdV(V, dV);
- Matrix ddH = V.transpose() * ddV + dV.transpose() * dV;
-
- return {H, dH, ddH};
+ Real getHamiltonian(const Vector& x) const {
+ return HFromV(VFromABJx(b, A, J * x, x));
}
std::tuple<Real, Vector> getHamGrad(const Vector& x) const {
- Vector V;
- Matrix dV;
- std::tie(V, dV, std::ignore) = VdVddV(x);
+ auto [V, dV, ddV] = V_∂V_∂∂V(x);
Real H = HFromV(V);
- Vector dH = makeTangent(dHFromVdV(V, dV), x);
+ Vector ∂H = dHFromVdV(V, dV);
+ Vector ∇H = makeTangent(∂H, x);
- return {H, dH};
+ return {H, ∇H};
}
- std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const {
- auto [H, dH, ddH] = HdHddH(x);
+ std::tuple<Real, Vector, Matrix> getHamGradHess(const Vector& x) const {
+ auto [H, ∂H, ∂∂H] = H_∂H_∂∂H(x);
- Vector gradH = dH - dH.dot(x) * x / (Real)N;
- Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose();
+ Vector ∇H = makeTangent(∂H, x);
+ Matrix HessH = ∂∂H - (∂H * x.transpose() + x.dot(∂H) * Matrix::Identity(N, N)
+ + (∂∂H * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose();
- return {H, gradH, hessH};
- }
-
- /* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result
- * in those execution paths getting optimized out. It is much more efficient to compute the
- * energy alone when only the energy is needed.
- */
- Real getHamiltonian(const Vector& x) const {
- return HFromV(VFromABJ(b, A, J * x, x));
+ return {H, ∇H, HessH};
}
Vector spectrum(const Vector& x) const {
Matrix hessH;
- std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x);
+ std::tie(std::ignore, std::ignore, hessH) = getHamGradHess(x);
Eigen::EigenSolver<Matrix> eigenS(hessH);
return eigenS.eigenvalues().real();
}
@@ -145,14 +135,14 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7
Vector x = x0;
Real λ = 10;
- auto [H, g] = M.getHamGrad(x);
+ auto [H, ∇H] = M.getHamGrad(x);
- while (g.norm() / M.N > ε && λ > ε) {
+ while (∇H.norm() / M.N > ε && λ > ε) {
Real HNew;
Vector xNew, gNew;
while(
- xNew = normalize(x + λ * g),
+ xNew = normalize(x + λ * ∇H),
HNew = M.getHamiltonian(xNew),
HNew < H && λ > ε
) {
@@ -160,7 +150,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7
}
x = xNew;
- std::tie(H, g) = M.getHamGrad(xNew);
+ std::tie(H, ∇H) = M.getHamGrad(xNew);
λ *= 2;
}
@@ -172,16 +162,16 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) {
Vector x = x0;
Real λ = 100;
- auto [H, g, m] = M.hamGradHess(x0);
+ auto [H, ∇H, HessH] = M.getHamGradHess(x0);
while (λ * ε < 1) {
- Vector dx = (m - λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g);
- Vector xNew = normalize(x - makeTangent(dx, x));
+ Vector Δx = (HessH - λ * (Matrix)HessH.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(∇H);
+ Vector xNew = normalize(x - makeTangent(Δx, x));
Real HNew = M.getHamiltonian(xNew);
if (HNew > H) {
x = xNew;
- std::tie(H, g, m) = M.hamGradHess(xNew);
+ std::tie(H, ∇H, HessH) = M.getHamGradHess(xNew);
λ /= 1.5;
} else {
@@ -198,8 +188,8 @@ Vector subagAlgorithm(const QuadraticModel& M, Rng& r, unsigned k, unsigned m) {
σ(axis) = sqrt(M.N / k);
for (unsigned i = 0; i < k; i++) {
- auto [H, dH] = M.getHamGrad(σ);
- Vector v = dH / dH.norm();
+ auto [H, ∇H] = M.getHamGrad(σ);
+ Vector v = ∇H / ∇H.norm();
σ += sqrt(M.N/k) * v;
}
@@ -251,11 +241,11 @@ int main(int argc, char* argv[]) {
for (unsigned sample = 0; sample < samples; sample++) {
QuadraticModel leastSquares(N, M, r, σ, A, J);
- x = findMinimum(leastSquares, x);
- std::cout << leastSquares.getHamiltonian(x) / N << std::flush;
+ x = gradientDescent(leastSquares, x);
+ std::cout << leastSquares.getHamiltonian(x) / N;
QuadraticModel leastSquares2(N, M, r, σ, A, J);
Vector σ = subagAlgorithm(leastSquares2, r, N, 15);
- σ = findMinimum(leastSquares2, σ);
+ σ = gradientDescent(leastSquares2, σ);
std::cout << " " << leastSquares2.getHamiltonian(σ) / N << std::endl;
}