Implemented SGD

author: Jaron Kent-Dobias <jaron@kent-dobias.com> 2025-02-10 17:21:09 -0300
committer: Jaron Kent-Dobias <jaron@kent-dobias.com> 2025-02-10 17:21:09 -0300
commit: c306ba29738d574851103e71ffb04354ec008fe8 (patch)
tree: 451ed001ba9152f6593d821f7b4f31830a70e335 /fits.cpp
parent: ad13e073a3b80db7185299dbe0c557dacab6b8cf (diff)
download: ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.tar.gz
ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.tar.bz2
ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.zip
1 files changed, 49 insertions, 65 deletions
diff --git a/fits.cpp b/fits.cpp
index 9ca9c2c..7ee9c64 100644
--- a/fits.cpp
+++ b/fits.cpp
@@ -1,6 +1,5 @@
 #include <getopt.h>
 #include <iomanip>
-#include <list>
 
 #include "pcg-cpp/include/pcg_random.hpp"
 #include "randutils/randutils.hpp"
@@ -11,26 +10,19 @@ using Rng = randutils::random_generator<pcg32>;
 
 using Real = double;
 using Vector = Eigen::Matrix<Real, Eigen::Dynamic, 1>;
+using Data = std::vector<std::tuple<Real, Real>>;
 
-/*
-inline Real basisFunctions(unsigned N, unsigned i, Real x) {
-  if (i == 0) {
-    return 1;
-  } else {
-    return pow(x, i);
-  }
-}
-*/
-
+#ifndef FITS_ABS_BASIS
 inline Real basisFunctions(unsigned N, unsigned i, Real x) {
   return std::legendre(i, 2.0 * (x - 0.5));
 }
+#endif
 
-/*
+#ifdef FITS_ABS_BASIS
 inline Real basisFunctions(unsigned N, unsigned i, Real x) {
   return abs(x - i / (N - 1.0));
 }
-*/
+#endif
 
 Real value(const Vector& coeffs, Real x) {
   Real v = 0;
@@ -42,25 +34,25 @@ Real value(const Vector& coeffs, Real x) {
   return v;
 }
 
-Real cost(const std::vector<std::tuple<Real, Real>>& data, const Vector& coeffs) {
+Real cost(Data::const_iterator batchBegin, Data::const_iterator batchEnd, const Vector& coeffs) {
   Real c = 0;
 
 #pragma omp parallel for reduction(+:c)
-  for (const std::tuple<Real, Real>& xy : data) {
-    Real x = std::get<0>(xy);
-    Real y = std::get<1>(xy);
+  for (Data::const_iterator it = batchBegin; it != batchEnd; it++) {
+    Real x = std::get<0>(*it);
+    Real y = std::get<1>(*it);
     c += 0.5 * pow(value(coeffs, x) - y, 2);
   }
 
   return c;
 }
 
-Vector dCost(const std::vector<std::tuple<Real, Real>>& data, const Vector& coeffs) {
+Vector dCost(Data::const_iterator batchBegin, Data::const_iterator batchEnd, const Vector& coeffs) {
   Vector dC = Vector::Zero(coeffs.size());
 
-  for (const std::tuple<Real, Real>& xy : data) {
-    Real x = std::get<0>(xy);
-    Real y = std::get<1>(xy);
+  for (Data::const_iterator it = batchBegin; it != batchEnd; it++) {
+    Real x = std::get<0>(*it);
+    Real y = std::get<1>(*it);
     Real Δc = value(coeffs, x) - y;
 
 #pragma omp parallel for
@@ -72,73 +64,56 @@ Vector dCost(const std::vector<std::tuple<Real, Real>>& data, const Vector& coef
   return dC;
 }
 
-Vector gradientDescent(const std::vector<std::tuple<Real, Real>>& data, const Vector& a₀, unsigned maxSteps, Real ε = 1e-12) {
+Vector stochasticGradientDescent(const Data& data, const Vector& a₀, unsigned nBatches, long unsigned maxSteps, Real ε = 1e-12) {
   Vector xₜ = a₀;
-  Real Hₜ = cost(data, a₀);
+  Real Hₜ;
   Real α = 1.0;
   Real m;
   Vector ∇H;
-  unsigned steps = 0;
+  long unsigned steps = 0;
+
+  unsigned batchSize = data.size() / nBatches;
+  Data::const_iterator batchBegin = data.begin();
+  Data::const_iterator batchEnd = data.begin() + batchSize;
+  Data::const_iterator effectiveEnd = data.begin() + batchSize * nBatches;
 
   while (
-    ∇H = dCost(data, xₜ), m = ∇H.squaredNorm(),
+    Hₜ = cost(batchBegin, batchEnd, xₜ),
+    ∇H = dCost(batchBegin, batchEnd, xₜ),
+    m = ∇H.squaredNorm(),
     m > ε && steps < maxSteps
   ) {
     Vector xₜ₊₁;
     Real Hₜ₊₁;
 
     while (
-      xₜ₊₁ = xₜ - α * ∇H, Hₜ₊₁ = cost(data, xₜ₊₁),
+      xₜ₊₁ = xₜ - α * ∇H, Hₜ₊₁ = cost(batchBegin, batchEnd, xₜ₊₁),
       Hₜ₊₁ > Hₜ - 0.25 * α * m
     ) {
       α /= 2;
     }
 
     xₜ = xₜ₊₁;
-    Hₜ = Hₜ₊₁;
     α *= 1.25;
     steps++;
-  }
-
-  return xₜ;
-}
-
-Vector dCostRand(const std::vector<std::tuple<Real, Real>>& data, const Vector& coeffs, Real batchP, Rng& r) {
-  Vector dC = Vector::Zero(coeffs.size());
 
-  for (unsigned j = 0; j < coeffs.size(); j++) {
-    for (const std::tuple<Real, Real>& xy : data) {
-      if (batchP > r.uniform(0.0, 1.0)) {
-        Real x = std::get<0>(xy);
-        Real y = std::get<1>(xy);
-        dC[j] += (value(coeffs, x) - y) * basisFunctions(coeffs.size(), j, x);
-      }
+    if (batchEnd == data.end()) {
+      batchBegin = data.begin();
+      batchEnd = data.begin() + batchSize;
+    } else if (batchEnd == effectiveEnd) {
+      batchBegin = effectiveEnd;
+      batchEnd = data.end();
+    } else {
+      batchBegin += batchSize;
+      batchEnd += batchSize;
     }
   }
 
-  return dC;
-}
-
-Vector stochasticGradientDescent(std::vector<std::tuple<Real, Real>>& data, const Vector& a₀, Rng& r, unsigned maxSteps, Real ε = 1e-12) {
-  Vector xₜ = a₀;
-  Real α = 1e-3;
-  Real m;
-  Vector ∇H;
-  unsigned steps = 0;
-
-  while (
-    ∇H = dCostRand(data, xₜ, 0.1, r), m = ∇H.squaredNorm(),
-    m > ε && steps < maxSteps
-  ) {
-    xₜ = xₜ - α * ∇H;
-    steps++;
-  }
-
   return xₜ;
 }
 
-std::vector<std::tuple<Real, Real>> generateData(Real(*f)(Real), unsigned M, Real σ, Rng& r) {
-  std::vector<std::tuple<Real, Real>> data;
+Data generateData(Real(*f)(Real), unsigned M, Real σ, Rng& r) {
+  Data data;
   data.reserve(M);
 
   for (unsigned i = 0; i < M; i++) {
@@ -152,12 +127,14 @@ std::vector<std::tuple<Real, Real>> generateData(Real(*f)(Real), unsigned M, Rea
 int main(int argc, char* argv[]) {
   unsigned N = 10;
   unsigned M = 10;
+  unsigned nBatches = 5;
   Real σ = 0.2;
+  Real iniVar = 0.0;
   long unsigned maxSteps = 1e12;
 
   int opt;
 
-  while ((opt = getopt(argc, argv, "N:M:s:S:")) != -1) {
+  while ((opt = getopt(argc, argv, "N:M:s:S:B:i:")) != -1) {
     switch (opt) {
     case 'N':
       N = (unsigned)atof(optarg);
@@ -171,6 +148,12 @@ int main(int argc, char* argv[]) {
     case 'S':
       maxSteps = (long unsigned)atof(optarg);
       break;
+    case 'B':
+      nBatches = (unsigned)atof(optarg);
+      break;
+    case 'i':
+      iniVar = atof(optarg);
+      break;
     default:
       exit(1);
     }
@@ -178,7 +161,7 @@ int main(int argc, char* argv[]) {
 
   Rng r;
 
-  std::vector<std::tuple<Real, Real>> data = generateData([](Real x) {return std::cos(2 * M_PI * x);}, M, σ, r);
+  Data data = generateData([](Real x) {return std::sin(2 * M_PI * x);}, M, σ, r);
 
   std::cout << std::setprecision(15);
 
@@ -189,9 +172,10 @@ int main(int argc, char* argv[]) {
 
   Vector a₀ = Vector::Zero(N);
   for (Real& aa : a₀) {
-    aa = r.variate<Real, std::normal_distribution>(0, 0);
+    aa = r.variate<Real, std::normal_distribution>(0, iniVar);
   }
-  Vector a = gradientDescent(data, a₀, maxSteps);
+
+  Vector a = stochasticGradientDescent(data, a₀, nBatches, maxSteps);
 
   for (unsigned i = 0; i < N; i++) {
     std::cout << a[i] << " ";
author	Jaron Kent-Dobias <jaron@kent-dobias.com>	2025-02-10 17:21:09 -0300
committer	Jaron Kent-Dobias <jaron@kent-dobias.com>	2025-02-10 17:21:09 -0300
commit	c306ba29738d574851103e71ffb04354ec008fe8 (patch)
tree	451ed001ba9152f6593d821f7b4f31830a70e335 /fits.cpp
parent	ad13e073a3b80db7185299dbe0c557dacab6b8cf (diff)
download	ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.tar.gz ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.tar.bz2 ictp-saifr_colloquium-c306ba29738d574851103e71ffb04354ec008fe8.zip