summaryrefslogtreecommitdiff
path: root/ictp-saifr_colloquium.tex
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2025-02-13 17:45:27 -0300
committerJaron Kent-Dobias <jaron@kent-dobias.com>2025-02-13 17:45:27 -0300
commite4c01a637e4cb782ff9c67d9a42c54bb62a135d6 (patch)
tree9106ffee7a1a5ea8be8e988c3bb54a7085f00959 /ictp-saifr_colloquium.tex
parentba0e51eed6abb4f134fe60fc6bf5d903aa86d9b7 (diff)
downloadictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.tar.gz
ictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.tar.bz2
ictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.zip
More work, figures
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r--ictp-saifr_colloquium.tex105
1 files changed, 91 insertions, 14 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex
index bedb05a..d883ed8 100644
--- a/ictp-saifr_colloquium.tex
+++ b/ictp-saifr_colloquium.tex
@@ -433,6 +433,8 @@
\[
\chi^2_\text{eff}(\pmb a\mid\text{data})=\chi^2(\pmb a\mid\text{data})+\lambda\|\pmb a\|^2
\]
+
+ \tiny\fullcite{Neyshabur_2017_Implicit}
\end{column}
\begin{column}{0.5\textwidth}
\begin{overprint}
@@ -818,6 +820,11 @@
Approximated gradient takes \emph{fewer} steps to find \emph{better} solutions
\end{column}
\begin{column}{0.5\textwidth}
+ \includegraphics[width=\textwidth]{figs/beneventano_2023.png}
+
+ \medskip\tiny
+
+ \fullcite{Beneventano_2023_On}
\end{column}
\end{columns}
\end{frame}
@@ -851,17 +858,32 @@
\frametitle{Machine learning is just \emph{non}linear least squares}
\begin{columns}
\begin{column}{0.5\textwidth}
- Structure and geometry of manifold of "perfect" solutions integral to understanding overparameterized fits
+ Structure and geometry of manifold of ``perfect'' solutions integral to understanding overparameterized fits
\medskip
- \textbf{BUT:} extremely little is known outside of the linear case
+ \textbf{BUT:} little is known outside the linear case
\medskip
- State of the art: sample two points, see if the line between them are also solutions
+ State of the art: sample two points, relax an elastic line between them
+
+ \bigskip
+
+ \textbf{Can we develop better ways to understand the solution space in nonlinear problems?}
\end{column}
\begin{column}{0.5\textwidth}
+ \includegraphics[width=0.52\textwidth]{figs/garipov_2018.png}
+ \hfill
+ \includegraphics[width=0.46\textwidth]{figs/draxler_2018.png}
+
+ \tiny\medskip
+
+ \fullcite{Garipov_2018_Loss}
+
+ \medskip
+
+ \fullcite{Draxler_2018_Essentially}
\end{column}
\end{columns}
\end{frame}
@@ -901,15 +923,71 @@
\begin{column}{0.33\textwidth}
\textbf{When does \boldmath{$\chi^2$} have any solutions?}
- \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that can interfere with optimization?}
+ \medskip
+
+ \tiny
+ \fullcite{Fyodorov_2019_A}
+
+ \smallskip
+
+ \fullcite{Fyodorov_2020_Counting}
+
+ \smallskip
+
+ \fullcite{Fyodorov_2022_Optimization}
+
+ \smallskip
+
+ \fullcite{Montanari_2024_On}
\end{column}
\begin{column}{0.33\textwidth}
+ \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that hinder optimization?}
+
+ \medskip
+
+ \tiny\fullcite{Kent-Dobias_2024_Conditioning}
+
+ \medskip
+
+ \normalsize
\textbf{How does gradient descent behave?}
- \textbf{How does stochastic gradient descent behave?}
+ \medskip
+
+ \tiny
+ \fullcite{Urbani_2023_A}
+
+ \smallskip
+
+ \fullcite{Kamali_2023_Dynamical}
+
+ \smallskip
+
+ \fullcite{Montanari_2023_Solving}
+
+ \normalsize
\end{column}
\begin{column}{0.33\textwidth}
+ \textbf{How does stochastic gradient descent behave?}
+
+ \medskip
+
+ \tiny
+ \fullcite{Kamali_2023_Stochastic}
+
+ \bigskip
+
+ \normalsize
\textbf{What does the manifold of perfect solutions look like?}
+
+ \medskip
+
+ This presentation!
+
+ \medskip
+
+ \tiny
+ \fullcite{Kent-Dobias_2024_On}
\end{column}
\end{columns}
\end{frame}
@@ -917,7 +995,7 @@
\begin{frame}
\frametitle{A simple model of nonlinear least squares}
\begin{columns}
- \begin{column}{0.65\textwidth}
+ \begin{column}{0.85\textwidth}
Solutions form $D=N-M-1$ dimensional manifold:
\[
\Omega=\left\{
@@ -937,15 +1015,13 @@
\smallskip
- \tiny\fullcite{Kent-Dobias_2022_How}
+ \tiny\fullcite{Kent-Dobias_2023_How}
\normalsize
\medskip
\textbf{Today:} how to compute the \emph{Euler characteristic} of the solution manifold
\end{column}
- \begin{column}{0.35\textwidth}
- \end{column}
\end{columns}
\end{frame}
@@ -1074,7 +1150,8 @@
\end{frame}
\begin{frame}
- \frametitle{Computing the Euler characteristic of level sets}
+ \frametitle{The Euler characteristic \boldmath{$\chi$}}
+ \framesubtitle{Computing the Euler characteristic}
\begin{columns}
\begin{column}{0.6\textwidth}
@@ -1118,7 +1195,7 @@
\framesubtitle{Results}
\begin{columns}
\begin{column}{0.5\textwidth}
- $\alpha=M/N$, $\alpha<1$ is overparameterized
+ $M$ data points, $N$ parameters, $\alpha=M/N$
\[
V_0=\hat f(J\mid \pmb a)
=\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
@@ -1152,7 +1229,7 @@
\framesubtitle{Results}
\begin{columns}
\begin{column}{0.5\textwidth}
- $\alpha=M/N$, $\alpha<1$ is overparameterized
+ $M$ data points, $N$ parameters, $\alpha=M/N$
\[
V_0=\hat f(J\mid \pmb a)
=\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
@@ -1226,11 +1303,11 @@
\begin{column}{0.5\textwidth}
\textbf{How do these structures interact with dynamics?}
- \medskip
+ \bigskip
\textbf{What structures exist in a problem with a non-random ground truth?}
- \medskip
+ \bigskip
\textbf{How do topological properties of solutions correlate with their quality?}
\end{column}