From c60430fc1c5d90ae06d1fd019257474c8f395bef Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 5 Nov 2021 17:31:10 +0100 Subject: Lots of progress towards Hessian implementation. --- langevin.cpp | 4 +- p-spin.hpp | 39 +++++++++++++ stokes.hpp | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tensor.hpp | 12 ++++ 4 files changed, 229 insertions(+), 2 deletions(-) diff --git a/langevin.cpp b/langevin.cpp index fe26332..d23cb33 100644 --- a/langevin.cpp +++ b/langevin.cpp @@ -194,8 +194,8 @@ int main(int argc, char* argv[]) { */ - Cord test(J, zSaddle, zSaddleNext, 5); - test.relax(J, 20, 1, 1e5); + Cord test(J, zSaddle, zSaddleNext, 2); + test.relaxNewton(J, 20, 1, 1e4); std::cout << test.z0.transpose() << std::endl; std::cout << test.z1.transpose() << std::endl; diff --git a/p-spin.hpp b/p-spin.hpp index e5cd195..50da209 100644 --- a/p-spin.hpp +++ b/p-spin.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include "types.hpp" #include "tensor.hpp" @@ -104,3 +105,41 @@ Matrix dzDotConjugate(const Vector& z, const Vector& dH, Matrix::Identity(ddH.rows(), ddH.cols()) - z.conjugate() * z.transpose() / z² ); } + +template +Tensor ddzDot(const Vector& z, const Vector& dH) { + Tensor zT = Eigen::TensorMap>(z.data(), {z.size()}); + Tensor dHT = Eigen::TensorMap>(dH.data(), {dH.size()}); + + Eigen::array, 0> ei = {}; + + Scalar z² = z.squaredNorm(); + + return - zT.conjugate().contract(dHT.conjugate(), ei).contract(zT.conjugate(), ei) / pow(z², 2) + - dHT.conjugate().contract(zT.conjugate(), ei).contract(zT.conjugate(), ei) / pow(z², 2) + + zT.conjugate().contract(zT.conjugate(), ei).contract(zT.conjugate(), ei) * (2.0 * dH.dot(z) / pow(z², 3)); +} + +template +Tensor ddzDotConjugate(const Vector& z, const Vector& dH, const Matrix& ddH, const Tensor& dddH) { + Tensor zT = Eigen::TensorMap>(z.data(), {z.size()}); + Tensor dHT = Eigen::TensorMap>(dH.data(), {dH.size()}); + Tensor ddHT = Eigen::TensorMap>(ddH.data(), {z.size(), z.size()}); + + Matrix I = Matrix::Identity(z.size(), z.size()); + Tensor IT = Eigen::TensorMap>(I.data(), {z.size(), z.size()}); + + Eigen::array, 0> ei = {}; + Eigen::array, 1> ip00 = {Eigen::IndexPair(0, 0)}; + + Scalar z² = z.squaredNorm(); + + return - dddH + dddH.contract(zT.conjugate(), ip00).contract(zT, ei) / z² + + IT.contract(ddHT.contract(zT.conjugate(), ip00), ei).shuffle((std::array){0, 2, 1}) / z² + + ddHT.contract(zT.conjugate(), ip00).contract(IT, ei) / z² + - zT.conjugate().contract(ddHT.contract(zT.conjugate(), ip00), ei).contract(zT, ei) / pow(z², 2) + - zT.conjugate().contract(IT, ei) * (z.dot(dH) / pow(z², 2)) + - IT.contract(zT.conjugate(), ei).shuffle((std::array){0, 2, 1}) * (z.dot(dH) / pow(z², 2)) + - ddHT.contract(zT.conjugate(), ip00).contract(zT.conjugate(), ei).contract(zT, ei) / pow(z², 2) + + zT.conjugate().contract(zT.conjugate(), ei).contract(zT, ei) * (2.0 * z.dot(dH) / pow(z², 3)); +} diff --git a/stokes.hpp b/stokes.hpp index b2694a9..948a484 100644 --- a/stokes.hpp +++ b/stokes.hpp @@ -332,6 +332,168 @@ public: return x; } + template + std::pair, Matrix> gsGradHess(const Tensor& J, Real t) const { + Vector z = f(t); + auto [H, dH, ddH] = hamGradHess(J, z); + Tensor dddH = ((p - 2) * (p - 1) * p / (Real)factorial(p)) * J; + Vector ż = zDot(z, dH); + Tensor żT = Eigen::TensorMap>(ż.data(), {z.size()}); + Vector dz = df(t); + Tensor dzT = Eigen::TensorMap>(dz.data(), {z.size()}); + Matrix dż = dzDot(z, dH); + Matrix dżc = dzDotConjugate(z, dH, ddH); + Tensor ddż = ddzDot(z, dH); + Tensor ddżc = ddzDotConjugate(z, dH, ddH, dddH); + + Eigen::array, 1> ip20 = {Eigen::IndexPair(2, 0)}; + + Tensor ddżcdzT = ddżc.contract(dzT, ip20); + Matrix ddżcdz = Eigen::Map>(ddżcdzT.data(), z.size(), z.size()); + + Tensor ddżdzT = ddż.contract(dzT.conjugate(), ip20); + Matrix ddżdz = Eigen::Map>(ddżcdzT.data(), z.size(), z.size()); + + Tensor ddżcżT = ddżc.contract(żT, ip20); + Matrix ddżcż = Eigen::Map>(ddżcżT.data(), z.size(), z.size()); + + Tensor ddżżcT = ddż.contract(żT.conjugate(), ip20); + Matrix ddżżc = Eigen::Map>(ddżżcT.data(), z.size(), z.size()); + + Vector x(z.size() * gs.size()); + Matrix M(x.size(), x.size()); + + for (unsigned i = 0; i < gs.size(); i++) { + Real fdg = gCoeff(i, t); + Real dfdg = dgCoeff(i, t); + Vector dC = - 0.5 / ż.norm() / dz.norm() * ( + dfdg * ż.conjugate() + fdg * dżc * dz + fdg * dż * dz.conjugate() + - real(dz.dot(ż)) * ( + dfdg * dz.conjugate() / dz.squaredNorm() + + fdg * (dżc * ż + dż * ż.conjugate()) / ż.squaredNorm() + ) + ); + + for (unsigned j = 0; j < dC.size(); j++) { + x(z.size() * i + j) = dC(j); + } + + for (unsigned j = 0; j < gs.size(); j++) { + Real fdgm = gCoeff(j, t); + Real dfdgm = dgCoeff(j, t); + Vector dCm = - 0.5 / ż.norm() / dz.norm() * ( + dfdgm * ż.conjugate() + fdgm * dżc * dz + fdgm * dż * dz.conjugate() + - real(dz.dot(ż)) * ( + dfdgm * dz.conjugate() / dz.squaredNorm() + + fdgm * (dżc * ż + dż * ż.conjugate()) / ż.squaredNorm() + ) + ); + Matrix ddC = + - 0.5 / ż.norm() / dz.norm() * ( + dfdg * fdgm * dżc.transpose() + dfdgm * fdg * dżc + + fdg * fdgm * ddżcdz + fdg * fdgm * ddżdz + ) + + 0.5 / dz.squaredNorm() * ( + dfdg * dz.conjugate() * dCm.transpose() + + dfdgm * dC * dz.adjoint() + ) + + 0.5 / ż.squaredNorm() * ( + fdg * (dżc * ż + dż * ż.conjugate()) * dCm.transpose() + + fdgm * dC * (dżc * ż + dż * ż.conjugate()).transpose() + ) + + 0.5 / ż.norm() / dz.norm() * real(dz.dot(ż)) * fdg * fdgm * ( + ddżżc + ddżcż + dżc * dż.transpose() + dż * dżc.transpose() + ) / ż.squaredNorm() + - 0.5 / ż.norm() / dz.norm() * real(dz.dot(ż)) * dfdg * dfdgm * dz.conjugate() * dz.adjoint() / pow(dz.squaredNorm(), 2) + - 0.5 / ż.norm() / dz.norm() * real(dz.dot(ż)) / pow(ż.squaredNorm(), 2) + * fdg * (dżc * ż + dż * ż.conjugate()) + * fdgm * (dżc * ż + dż * ż.conjugate()).transpose() + ; + + for (unsigned k = 0; k < z.size(); k++) { + for (unsigned l = 0; l < z.size(); l++) { + M(z.size() * i + k, z.size() * j + l) = ddC(k, l); + } + } + } + } + + Cord cNew(*this); + Matrix Mf = Matrix::Zero(x.size(), x.size()); + for (unsigned i = 0; i < x.size(); i++) { + for (unsigned j = 0; j < x.size(); j++) { + Scalar hi = std::complex(0.03145, 0.08452); + Scalar hj = std::complex(0.09483, 0.02453); + Scalar c0 = cost(J, t); + cNew.gs[i / z.size()](i % z.size()) += hi; + Scalar ci = cNew.cost(J, t); + cNew.gs[j / z.size()](j % z.size()) += hj; + Scalar cij = cNew.cost(J, t); + cNew.gs[i / z.size()](i % z.size()) -= hi; + Scalar cj = cNew.cost(J, t); + cNew.gs[j / z.size()](j % z.size()) -= hj; + + Mf(i, j) = (cij - ci - cj + c0) / (hi * hj); + } + } + + std::cout << M << std::endl; + std::cout << Mf << std::endl; + getchar(); + + return {x, M}; + } + + template + std::pair relaxStepNewton(const Tensor& J, unsigned nt, Real δ₀) { + Vector dgsTot = Vector::Zero(z0.size() * gs.size()); + Matrix HessTot = Matrix::Zero(z0.size() * gs.size(), z0.size() * gs.size()); + + for (unsigned i = 0; i < nt; i++) { + Real t = (i + 1.0) / (nt + 1.0); + auto [dgsi, Hessi] = gsGradHess(J, t); + + dgsTot += dgsi / nt; + HessTot += Hessi / nt; + } + + Real stepSize = dgsTot.norm(); + + Cord cNew(*this); + + Real δ = δ₀; + Real oldCost = totalCost(J, nt); + + Vector δg = HessTot.partialPivLu().solve(dgsTot); + + for (unsigned i = 0; i < gs.size(); i++) { + for (unsigned j = 0; j < z0.size(); j++) { + cNew.gs[i](j) = gs[i](j) - δg[z0.size() * i + j]; + } + } + + Real newCost = cNew.totalCost(J, nt); + if (newCost < oldCost) { + std::cout << "Newton step accepted!" << std::endl; + } + + while (newCost > oldCost) { + for (unsigned i = 0; i < gs.size(); i++) { + for (unsigned j = 0; j < z0.size(); j++) { + cNew.gs[i](j) = gs[i](j) - δ * conj(dgsTot[z0.size() * i + j]); + } + } + + newCost = cNew.totalCost(J, nt); + + δ /= 2; + } + + gs = cNew.gs; + + return {2*δ, stepSize}; + } + template std::pair relaxStep(const Tensor& J, unsigned nt, Real δ₀) { std::vector> dgsTot(gs.size(), Vector::Zero(z0.size())); @@ -380,6 +542,20 @@ public: while (stepSize > 1e-7 && steps < maxSteps) { std::tie(δ, stepSize) = relaxStep(J, nt, δ); std::cout << totalCost(J, nt) << " " << δ << " " << stepSize << std::endl; + δ *= 2; + steps++; + } + } + + template + void relaxNewton(const Tensor& J, unsigned nt, Real δ₀, unsigned maxSteps) { + Real δ = δ₀; + Real stepSize = std::numeric_limits::infinity(); + unsigned steps = 0; + while (stepSize > 1e-7 && steps < maxSteps) { + std::tie(δ, stepSize) = relaxStepNewton(J, nt, δ); + std::cout << totalCost(J, nt) << " " << δ << " " << stepSize << std::endl; + δ *= 2; steps++; } } diff --git a/tensor.hpp b/tensor.hpp index 1d90c78..aa33069 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -123,3 +123,15 @@ Matrix contractDown(const Tensor& J, const Vector& z) Tensor Jz = J.contract(zT, ip00); return contractDown(Jz, z); } + +template +Tensor contractDownTo(const Tensor& J, const Vector& z) { + return J; +} + +template +Tensor contractDownTo(const Tensor& J, const Vector& z) { + Tensor zT = Eigen::TensorMap>(z.data(), {z.size()}); + Tensor Jz = J.contract(zT, ip00); + return contractDownTo(Jz, z); +} -- cgit v1.2.3-70-g09d2