#include #include #include #include "pcg-cpp/include/pcg_random.hpp" #include "randutils/randutils.hpp" #include "eigen/Eigen/Dense" using Rng = randutils::random_generator; using Real = double; using Vector = Eigen::Matrix; /* inline Real basisFunctions(unsigned N, unsigned i, Real x) { if (i == 0) { return 1; } else { return pow(x, i); } } */ inline Real basisFunctions(unsigned N, unsigned i, Real x) { return std::legendre(i, 2.0 * (x - 0.5)); } /* inline Real basisFunctions(unsigned N, unsigned i, Real x) { return abs(x - i / (N - 1.0)); } */ Real value(const Vector& coeffs, Real x) { Real v = 0; for (unsigned j = 0; j < coeffs.size(); j++) { v += coeffs[j] * basisFunctions(coeffs.size(), j, x); } return v; } Real cost(const std::vector>& data, const Vector& coeffs) { Real c = 0; #pragma omp parallel for reduction(+:c) for (const std::tuple& xy : data) { Real x = std::get<0>(xy); Real y = std::get<1>(xy); c += 0.5 * pow(value(coeffs, x) - y, 2); } return c; } Vector dCost(const std::vector>& data, const Vector& coeffs) { Vector dC = Vector::Zero(coeffs.size()); for (const std::tuple& xy : data) { Real x = std::get<0>(xy); Real y = std::get<1>(xy); Real Δc = value(coeffs, x) - y; #pragma omp parallel for for (unsigned j = 0; j < coeffs.size(); j++) { dC[j] += Δc * basisFunctions(coeffs.size(), j, x); } } return dC; } Vector gradientDescent(const std::vector>& data, const Vector& a₀, unsigned maxSteps, Real ε = 1e-12) { Vector xₜ = a₀; Real Hₜ = cost(data, a₀); Real α = 1.0; Real m; Vector ∇H; unsigned steps = 0; while ( ∇H = dCost(data, xₜ), m = ∇H.squaredNorm(), m > ε && steps < maxSteps ) { Vector xₜ₊₁; Real Hₜ₊₁; while ( xₜ₊₁ = xₜ - α * ∇H, Hₜ₊₁ = cost(data, xₜ₊₁), Hₜ₊₁ > Hₜ - 0.25 * α * m ) { α /= 2; } xₜ = xₜ₊₁; Hₜ = Hₜ₊₁; α *= 1.25; steps++; } return xₜ; } Vector dCostRand(const std::vector>& data, const Vector& coeffs, Real batchP, Rng& r) { Vector dC = Vector::Zero(coeffs.size()); for (unsigned j = 0; j < coeffs.size(); j++) { for (const std::tuple& xy : data) { if (batchP > r.uniform(0.0, 1.0)) { Real x = std::get<0>(xy); Real y = std::get<1>(xy); dC[j] += (value(coeffs, x) - y) * basisFunctions(coeffs.size(), j, x); } } } return dC; } Vector stochasticGradientDescent(std::vector>& data, const Vector& a₀, Rng& r, unsigned maxSteps, Real ε = 1e-12) { Vector xₜ = a₀; Real α = 1e-3; Real m; Vector ∇H; unsigned steps = 0; while ( ∇H = dCostRand(data, xₜ, 0.1, r), m = ∇H.squaredNorm(), m > ε && steps < maxSteps ) { xₜ = xₜ - α * ∇H; steps++; } return xₜ; } std::vector> generateData(Real(*f)(Real), unsigned M, Real σ, Rng& r) { std::vector> data; data.reserve(M); for (unsigned i = 0; i < M; i++) { Real x = ((Real)i) / (M - 1.0); data.push_back({x, f(x) + r.variate(0, σ)}); } return data; } int main(int argc, char* argv[]) { unsigned N = 10; unsigned M = 10; Real σ = 0.2; long unsigned maxSteps = 1e12; int opt; while ((opt = getopt(argc, argv, "N:M:s:S:")) != -1) { switch (opt) { case 'N': N = (unsigned)atof(optarg); break; case 'M': M = (unsigned)atof(optarg); break; case 's': σ = atof(optarg); break; case 'S': maxSteps = (long unsigned)atof(optarg); break; default: exit(1); } } Rng r; std::vector> data = generateData([](Real x) {return std::cos(2 * M_PI * x);}, M, σ, r); std::cout << std::setprecision(15); for (std::tuple datum : data) { std::cout << std::get<0>(datum) << " " << std::get<1>(datum) << " "; } std::cout << std::endl; Vector a₀ = Vector::Zero(N); for (Real& aa : a₀) { aa = r.variate(0, 0); } Vector a = gradientDescent(data, a₀, maxSteps); for (unsigned i = 0; i < N; i++) { std::cout << a[i] << " "; } std::cout << std::endl; return 0; }