diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2025-02-13 17:45:27 -0300 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2025-02-13 17:45:27 -0300 |
commit | e4c01a637e4cb782ff9c67d9a42c54bb62a135d6 (patch) | |
tree | 9106ffee7a1a5ea8be8e988c3bb54a7085f00959 /ictp-saifr_colloquium.tex | |
parent | ba0e51eed6abb4f134fe60fc6bf5d903aa86d9b7 (diff) | |
download | ictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.tar.gz ictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.tar.bz2 ictp-saifr_colloquium-e4c01a637e4cb782ff9c67d9a42c54bb62a135d6.zip |
More work, figures
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r-- | ictp-saifr_colloquium.tex | 105 |
1 files changed, 91 insertions, 14 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex index bedb05a..d883ed8 100644 --- a/ictp-saifr_colloquium.tex +++ b/ictp-saifr_colloquium.tex @@ -433,6 +433,8 @@ \[ \chi^2_\text{eff}(\pmb a\mid\text{data})=\chi^2(\pmb a\mid\text{data})+\lambda\|\pmb a\|^2 \] + + \tiny\fullcite{Neyshabur_2017_Implicit} \end{column} \begin{column}{0.5\textwidth} \begin{overprint} @@ -818,6 +820,11 @@ Approximated gradient takes \emph{fewer} steps to find \emph{better} solutions \end{column} \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{figs/beneventano_2023.png} + + \medskip\tiny + + \fullcite{Beneventano_2023_On} \end{column} \end{columns} \end{frame} @@ -851,17 +858,32 @@ \frametitle{Machine learning is just \emph{non}linear least squares} \begin{columns} \begin{column}{0.5\textwidth} - Structure and geometry of manifold of "perfect" solutions integral to understanding overparameterized fits + Structure and geometry of manifold of ``perfect'' solutions integral to understanding overparameterized fits \medskip - \textbf{BUT:} extremely little is known outside of the linear case + \textbf{BUT:} little is known outside the linear case \medskip - State of the art: sample two points, see if the line between them are also solutions + State of the art: sample two points, relax an elastic line between them + + \bigskip + + \textbf{Can we develop better ways to understand the solution space in nonlinear problems?} \end{column} \begin{column}{0.5\textwidth} + \includegraphics[width=0.52\textwidth]{figs/garipov_2018.png} + \hfill + \includegraphics[width=0.46\textwidth]{figs/draxler_2018.png} + + \tiny\medskip + + \fullcite{Garipov_2018_Loss} + + \medskip + + \fullcite{Draxler_2018_Essentially} \end{column} \end{columns} \end{frame} @@ -901,15 +923,71 @@ \begin{column}{0.33\textwidth} \textbf{When does \boldmath{$\chi^2$} have any solutions?} - \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that can interfere with optimization?} + \medskip + + \tiny + \fullcite{Fyodorov_2019_A} + + \smallskip + + \fullcite{Fyodorov_2020_Counting} + + \smallskip + + \fullcite{Fyodorov_2022_Optimization} + + \smallskip + + \fullcite{Montanari_2024_On} \end{column} \begin{column}{0.33\textwidth} + \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that hinder optimization?} + + \medskip + + \tiny\fullcite{Kent-Dobias_2024_Conditioning} + + \medskip + + \normalsize \textbf{How does gradient descent behave?} - \textbf{How does stochastic gradient descent behave?} + \medskip + + \tiny + \fullcite{Urbani_2023_A} + + \smallskip + + \fullcite{Kamali_2023_Dynamical} + + \smallskip + + \fullcite{Montanari_2023_Solving} + + \normalsize \end{column} \begin{column}{0.33\textwidth} + \textbf{How does stochastic gradient descent behave?} + + \medskip + + \tiny + \fullcite{Kamali_2023_Stochastic} + + \bigskip + + \normalsize \textbf{What does the manifold of perfect solutions look like?} + + \medskip + + This presentation! + + \medskip + + \tiny + \fullcite{Kent-Dobias_2024_On} \end{column} \end{columns} \end{frame} @@ -917,7 +995,7 @@ \begin{frame} \frametitle{A simple model of nonlinear least squares} \begin{columns} - \begin{column}{0.65\textwidth} + \begin{column}{0.85\textwidth} Solutions form $D=N-M-1$ dimensional manifold: \[ \Omega=\left\{ @@ -937,15 +1015,13 @@ \smallskip - \tiny\fullcite{Kent-Dobias_2022_How} + \tiny\fullcite{Kent-Dobias_2023_How} \normalsize \medskip \textbf{Today:} how to compute the \emph{Euler characteristic} of the solution manifold \end{column} - \begin{column}{0.35\textwidth} - \end{column} \end{columns} \end{frame} @@ -1074,7 +1150,8 @@ \end{frame} \begin{frame} - \frametitle{Computing the Euler characteristic of level sets} + \frametitle{The Euler characteristic \boldmath{$\chi$}} + \framesubtitle{Computing the Euler characteristic} \begin{columns} \begin{column}{0.6\textwidth} @@ -1118,7 +1195,7 @@ \framesubtitle{Results} \begin{columns} \begin{column}{0.5\textwidth} - $\alpha=M/N$, $\alpha<1$ is overparameterized + $M$ data points, $N$ parameters, $\alpha=M/N$ \[ V_0=\hat f(J\mid \pmb a) =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p} @@ -1152,7 +1229,7 @@ \framesubtitle{Results} \begin{columns} \begin{column}{0.5\textwidth} - $\alpha=M/N$, $\alpha<1$ is overparameterized + $M$ data points, $N$ parameters, $\alpha=M/N$ \[ V_0=\hat f(J\mid \pmb a) =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p} @@ -1226,11 +1303,11 @@ \begin{column}{0.5\textwidth} \textbf{How do these structures interact with dynamics?} - \medskip + \bigskip \textbf{What structures exist in a problem with a non-random ground truth?} - \medskip + \bigskip \textbf{How do topological properties of solutions correlate with their quality?} \end{column} |