diff options
-rw-r--r-- | compile_flags.txt | 3 | ||||
-rw-r--r-- | least_squares.cpp | 114 |
2 files changed, 55 insertions, 62 deletions
diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..b5502dc --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,3 @@ +-std=c++2b +-Wno-mathematical-notation-identifier-extension +-Wall diff --git a/least_squares.cpp b/least_squares.cpp index b5b3a9a..b294679 100644 --- a/least_squares.cpp +++ b/least_squares.cpp @@ -12,10 +12,6 @@ using Real = double; using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>; using Matrix = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>; -/* Eigen tensor manipulations are quite annoying, especially with the need to convert other types - * into tensors beforehand. Here I overload multiplication operators to allow contraction between - * vectors and the first or last index of a tensor. - */ class Tensor : public Eigen::Tensor<Real, 3> { using Eigen::Tensor<Real, 3>::Tensor; @@ -26,10 +22,6 @@ public: const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20); return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1)); } - - Tensor operator+(const Eigen::Tensor<Real, 3>& J) const { - return Eigen::Tensor<Real, 3>::operator+(J); - } }; Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) { @@ -51,11 +43,11 @@ Real HFromV(const Vector& V) { return 0.5 * V.squaredNorm(); } -Vector dHFromVdV(const Vector& V, const Matrix& dV) { - return V.transpose() * dV; +Vector dHFromVdV(const Vector& V, const Matrix& ∂V) { + return V.transpose() * ∂V; } -Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) { +Vector VFromABJx(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) { return b + (A + 0.5 * Jx) * x; } @@ -65,15 +57,35 @@ private: Matrix A; Vector b; + std::tuple<Vector, Matrix, const Tensor&> V_∂V_∂∂V(const Vector& x) const { + Matrix Jx = J * x; + Vector V = VFromABJx(b, A, Jx, x); + Matrix ∂V = A + Jx; + + return {V, ∂V, J}; + } + + std::tuple<Real, Vector, Matrix> H_∂H_∂∂H(const Vector& x) const { + auto [V, ∂V, ∂∂V] = V_∂V_∂∂V(x); + + Real H = HFromV(V); + Vector ∂H = dHFromVdV(V, ∂V); + Matrix ∂∂H = V.transpose() * ∂∂V + ∂V.transpose() * ∂V; + + return {H, ∂H, ∂∂H}; + } + public: unsigned N; unsigned M; - QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) { - Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1; + + QuadraticModel(unsigned N, unsigned M, Rng& r, Real μ1, Real μ2, Real μ3) : J(M, N, N), A(M, N), b(M), N(N), M(M) { + Eigen::StaticSGroup<Eigen::Symmetry<1,2>> sym23; + for (unsigned k = 0; k < N; k++) { for (unsigned j = k; j < N; j++) { for (unsigned i = 0; i < M; i++) { - ssym1(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N); + sym23(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N); } } } @@ -87,55 +99,33 @@ public: } } - std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const { - Matrix Jx = J * x; - Vector V = VFromABJ(b, A, Jx, x); - Matrix dV = A + Jx; - - return {V, dV, J}; - } - - std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { - auto [V, dV, ddV] = VdVddV(x); - - Real H = HFromV(V); - Vector dH = dHFromVdV(V, dV); - Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; - - return {H, dH, ddH}; + Real getHamiltonian(const Vector& x) const { + return HFromV(VFromABJx(b, A, J * x, x)); } std::tuple<Real, Vector> getHamGrad(const Vector& x) const { - Vector V; - Matrix dV; - std::tie(V, dV, std::ignore) = VdVddV(x); + auto [V, dV, ddV] = V_∂V_∂∂V(x); Real H = HFromV(V); - Vector dH = makeTangent(dHFromVdV(V, dV), x); + Vector ∂H = dHFromVdV(V, dV); + Vector ∇H = makeTangent(∂H, x); - return {H, dH}; + return {H, ∇H}; } - std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { - auto [H, dH, ddH] = HdHddH(x); + std::tuple<Real, Vector, Matrix> getHamGradHess(const Vector& x) const { + auto [H, ∂H, ∂∂H] = H_∂H_∂∂H(x); - Vector gradH = dH - dH.dot(x) * x / (Real)N; - Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); + Vector ∇H = makeTangent(∂H, x); + Matrix HessH = ∂∂H - (∂H * x.transpose() + x.dot(∂H) * Matrix::Identity(N, N) + + (∂∂H * x) * x.transpose()) / (Real)N + 2.0 * x * x.transpose(); - return {H, gradH, hessH}; - } - - /* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result - * in those execution paths getting optimized out. It is much more efficient to compute the - * energy alone when only the energy is needed. - */ - Real getHamiltonian(const Vector& x) const { - return HFromV(VFromABJ(b, A, J * x, x)); + return {H, ∇H, HessH}; } Vector spectrum(const Vector& x) const { Matrix hessH; - std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); + std::tie(std::ignore, std::ignore, hessH) = getHamGradHess(x); Eigen::EigenSolver<Matrix> eigenS(hessH); return eigenS.eigenvalues().real(); } @@ -145,14 +135,14 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7 Vector x = x0; Real λ = 10; - auto [H, g] = M.getHamGrad(x); + auto [H, ∇H] = M.getHamGrad(x); - while (g.norm() / M.N > ε && λ > ε) { + while (∇H.norm() / M.N > ε && λ > ε) { Real HNew; Vector xNew, gNew; while( - xNew = normalize(x + λ * g), + xNew = normalize(x + λ * ∇H), HNew = M.getHamiltonian(xNew), HNew < H && λ > ε ) { @@ -160,7 +150,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7 } x = xNew; - std::tie(H, g) = M.getHamGrad(xNew); + std::tie(H, ∇H) = M.getHamGrad(xNew); λ *= 2; } @@ -172,16 +162,16 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) { Vector x = x0; Real λ = 100; - auto [H, g, m] = M.hamGradHess(x0); + auto [H, ∇H, HessH] = M.getHamGradHess(x0); while (λ * ε < 1) { - Vector dx = (m - λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g); - Vector xNew = normalize(x - makeTangent(dx, x)); + Vector Δx = (HessH - λ * (Matrix)HessH.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(∇H); + Vector xNew = normalize(x - makeTangent(Δx, x)); Real HNew = M.getHamiltonian(xNew); if (HNew > H) { x = xNew; - std::tie(H, g, m) = M.hamGradHess(xNew); + std::tie(H, ∇H, HessH) = M.getHamGradHess(xNew); λ /= 1.5; } else { @@ -198,8 +188,8 @@ Vector subagAlgorithm(const QuadraticModel& M, Rng& r, unsigned k, unsigned m) { σ(axis) = sqrt(M.N / k); for (unsigned i = 0; i < k; i++) { - auto [H, dH] = M.getHamGrad(σ); - Vector v = dH / dH.norm(); + auto [H, ∇H] = M.getHamGrad(σ); + Vector v = ∇H / ∇H.norm(); σ += sqrt(M.N/k) * v; } @@ -251,11 +241,11 @@ int main(int argc, char* argv[]) { for (unsigned sample = 0; sample < samples; sample++) { QuadraticModel leastSquares(N, M, r, σ, A, J); - x = findMinimum(leastSquares, x); - std::cout << leastSquares.getHamiltonian(x) / N << std::flush; + x = gradientDescent(leastSquares, x); + std::cout << leastSquares.getHamiltonian(x) / N; QuadraticModel leastSquares2(N, M, r, σ, A, J); Vector σ = subagAlgorithm(leastSquares2, r, N, 15); - σ = findMinimum(leastSquares2, σ); + σ = gradientDescent(leastSquares2, σ); std::cout << " " << leastSquares2.getHamiltonian(σ) / N << std::endl; } |