diff options
| -rw-r--r-- | compile_flags.txt | 3 | ||||
| -rw-r--r-- | least_squares.cpp | 114 | 
2 files changed, 55 insertions, 62 deletions
| diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..b5502dc --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,3 @@ +-std=c++2b +-Wno-mathematical-notation-identifier-extension +-Wall diff --git a/least_squares.cpp b/least_squares.cpp index b5b3a9a..b294679 100644 --- a/least_squares.cpp +++ b/least_squares.cpp @@ -12,10 +12,6 @@ using Real = double;  using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>;  using Matrix = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>; -/* Eigen tensor manipulations are quite annoying, especially with the need to convert other types - * into tensors beforehand. Here I overload multiplication operators to allow contraction between - * vectors and the first or last index of a tensor. - */  class Tensor : public Eigen::Tensor<Real, 3> {    using Eigen::Tensor<Real, 3>::Tensor; @@ -26,10 +22,6 @@ public:      const Eigen::Tensor<Real, 2> JxT = contract(xT, ip20);      return Eigen::Map<const Matrix>(JxT.data(), dimension(0), dimension(1));    } - -  Tensor operator+(const Eigen::Tensor<Real, 3>& J) const { -    return Eigen::Tensor<Real, 3>::operator+(J); -  }  };  Matrix operator*(const Eigen::Matrix<Real, 1, Eigen::Dynamic>& x, const Tensor& J) { @@ -51,11 +43,11 @@ Real HFromV(const Vector& V) {    return 0.5 * V.squaredNorm();  } -Vector dHFromVdV(const Vector& V, const Matrix& dV) { -  return V.transpose() * dV; +Vector dHFromVdV(const Vector& V, const Matrix& ∂V) { +  return V.transpose() * ∂V;  } -Vector VFromABJ(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) { +Vector VFromABJx(const Vector& b, const Matrix& A, const Matrix& Jx, const Vector& x) {    return b + (A + 0.5 * Jx) * x;  } @@ -65,15 +57,35 @@ private:    Matrix A;    Vector b; +  std::tuple<Vector, Matrix, const Tensor&> V_∂V_∂∂V(const Vector& x) const { +    Matrix Jx = J * x; +    Vector V = VFromABJx(b, A, Jx, x); +    Matrix ∂V = A + Jx; + +    return {V, ∂V, J}; +  } + +  std::tuple<Real, Vector, Matrix> H_∂H_∂∂H(const Vector& x) const { +    auto [V, ∂V, ∂∂V] = V_∂V_∂∂V(x); + +    Real H = HFromV(V); +    Vector ∂H = dHFromVdV(V, ∂V); +    Matrix ∂∂H = V.transpose() * ∂∂V + ∂V.transpose() * ∂V; + +    return {H, ∂H, ∂∂H}; +  } +  public:    unsigned N;    unsigned M; -  QuadraticModel(unsigned N, unsigned M, Rng& r, double μ1, double μ2, double μ3) : N(N), M(M), J(M, N, N), A(M, N), b(M) { -    Eigen::StaticSGroup<Eigen::Symmetry<1,2>> ssym1; + +  QuadraticModel(unsigned N, unsigned M, Rng& r, Real μ1, Real μ2, Real μ3) : J(M, N, N), A(M, N), b(M), N(N), M(M) { +    Eigen::StaticSGroup<Eigen::Symmetry<1,2>> sym23; +      for (unsigned k = 0; k < N; k++) {        for (unsigned j = k; j < N; j++) {          for (unsigned i = 0; i < M; i++) { -          ssym1(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N); +          sym23(J, i, j, k) = r.variate<Real, std::normal_distribution>(0, sqrt(2 * μ3) / N);          }        }      } @@ -87,55 +99,33 @@ public:      }    } -  std::tuple<Vector, Matrix, const Tensor&> VdVddV(const Vector& x) const { -    Matrix Jx = J * x; -    Vector V = VFromABJ(b, A, Jx, x); -    Matrix dV = A + Jx; - -    return {V, dV, J}; -  } - -  std::tuple<Real, Vector, Matrix> HdHddH(const Vector& x) const { -    auto [V, dV, ddV] = VdVddV(x); - -    Real H = HFromV(V); -    Vector dH = dHFromVdV(V, dV); -    Matrix ddH = V.transpose() * ddV + dV.transpose() * dV; - -    return {H, dH, ddH}; +  Real getHamiltonian(const Vector& x) const { +    return HFromV(VFromABJx(b, A, J * x, x));    }    std::tuple<Real, Vector> getHamGrad(const Vector& x) const { -    Vector V; -    Matrix dV; -    std::tie(V, dV, std::ignore) = VdVddV(x); +    auto [V, dV, ddV] = V_∂V_∂∂V(x);      Real H = HFromV(V); -    Vector dH = makeTangent(dHFromVdV(V, dV), x); +    Vector ∂H = dHFromVdV(V, dV); +    Vector ∇H = makeTangent(∂H, x); -    return {H, dH}; +    return {H, ∇H};    } -  std::tuple<Real, Vector, Matrix> hamGradHess(const Vector& x) const { -    auto [H, dH, ddH] = HdHddH(x); +  std::tuple<Real, Vector, Matrix> getHamGradHess(const Vector& x) const { +    auto [H, ∂H, ∂∂H] = H_∂H_∂∂H(x); -    Vector gradH = dH - dH.dot(x) * x / (Real)N; -    Matrix hessH = ddH - (dH * x.transpose() + x.dot(dH) * Matrix::Identity(N, N) + (ddH * x) * x.transpose()) / (Real)N  + 2.0 * x * x.transpose(); +    Vector ∇H = makeTangent(∂H, x); +    Matrix HessH = ∂∂H - (∂H * x.transpose() + x.dot(∂H) * Matrix::Identity(N, N) +        + (∂∂H * x) * x.transpose()) / (Real)N  + 2.0 * x * x.transpose(); -    return {H, gradH, hessH}; -  } - -  /* Unfortunately benchmarking indicates that ignorning entries of a returned tuple doesn't result -   * in those execution paths getting optimized out. It is much more efficient to compute the -   * energy alone when only the energy is needed. -   */ -  Real getHamiltonian(const Vector& x) const { -    return HFromV(VFromABJ(b, A, J * x, x)); +    return {H, ∇H, HessH};    }    Vector spectrum(const Vector& x) const {      Matrix hessH; -    std::tie(std::ignore, std::ignore, hessH) = hamGradHess(x); +    std::tie(std::ignore, std::ignore, hessH) = getHamGradHess(x);      Eigen::EigenSolver<Matrix> eigenS(hessH);      return eigenS.eigenvalues().real();    } @@ -145,14 +135,14 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7    Vector x = x0;    Real λ = 10; -  auto [H, g] = M.getHamGrad(x); +  auto [H, ∇H] = M.getHamGrad(x); -  while (g.norm() / M.N > ε && λ > ε) { +  while (∇H.norm() / M.N > ε && λ > ε) {      Real HNew;      Vector xNew, gNew;      while( -      xNew = normalize(x + λ * g), +      xNew = normalize(x + λ * ∇H),        HNew = M.getHamiltonian(xNew),        HNew < H && λ > ε      ) { @@ -160,7 +150,7 @@ Vector gradientDescent(const QuadraticModel& M, const Vector& x0, Real ε = 1e-7      }      x = xNew; -    std::tie(H, g) = M.getHamGrad(xNew); +    std::tie(H, ∇H) = M.getHamGrad(xNew);      λ *= 2;    } @@ -172,16 +162,16 @@ Vector findMinimum(const QuadraticModel& M, const Vector& x0, Real ε = 1e-5) {    Vector x = x0;    Real λ = 100; -  auto [H, g, m] = M.hamGradHess(x0); +  auto [H, ∇H, HessH] = M.getHamGradHess(x0);    while (λ * ε < 1) { -    Vector dx = (m - λ * (Matrix)m.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(g); -    Vector xNew = normalize(x - makeTangent(dx, x)); +    Vector Δx = (HessH - λ * (Matrix)HessH.diagonal().cwiseAbs().asDiagonal()).partialPivLu().solve(∇H); +    Vector xNew = normalize(x - makeTangent(Δx, x));      Real HNew = M.getHamiltonian(xNew);      if (HNew > H) {        x = xNew; -      std::tie(H, g, m) = M.hamGradHess(xNew); +      std::tie(H, ∇H, HessH) = M.getHamGradHess(xNew);        λ /= 1.5;      } else { @@ -198,8 +188,8 @@ Vector subagAlgorithm(const QuadraticModel& M, Rng& r, unsigned k, unsigned m) {    σ(axis) = sqrt(M.N / k);    for (unsigned i = 0; i < k; i++) { -    auto [H, dH] = M.getHamGrad(σ); -    Vector v = dH / dH.norm(); +    auto [H, ∇H] = M.getHamGrad(σ); +    Vector v = ∇H / ∇H.norm();      σ += sqrt(M.N/k) * v;    } @@ -251,11 +241,11 @@ int main(int argc, char* argv[]) {    for (unsigned sample = 0; sample < samples; sample++) {      QuadraticModel leastSquares(N, M, r, σ, A, J); -    x = findMinimum(leastSquares, x); -    std::cout << leastSquares.getHamiltonian(x) / N << std::flush; +    x = gradientDescent(leastSquares, x); +    std::cout << leastSquares.getHamiltonian(x) / N;      QuadraticModel leastSquares2(N, M, r, σ, A, J);      Vector σ = subagAlgorithm(leastSquares2, r, N, 15); -    σ = findMinimum(leastSquares2, σ); +    σ = gradientDescent(leastSquares2, σ);      std::cout << " " << leastSquares2.getHamiltonian(σ) / N << std::endl;    } | 
