diff options
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r-- | ictp-saifr_colloquium.tex | 309 |
1 files changed, 165 insertions, 144 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex index e1c78bb..bedb05a 100644 --- a/ictp-saifr_colloquium.tex +++ b/ictp-saifr_colloquium.tex @@ -856,6 +856,10 @@ \medskip \textbf{BUT:} extremely little is known outside of the linear case + + \medskip + + State of the art: sample two points, see if the line between them are also solutions \end{column} \begin{column}{0.5\textwidth} \end{column} @@ -892,6 +896,60 @@ \end{frame} \begin{frame} + \frametitle{A simple model of nonlinear least squares} + \begin{columns} + \begin{column}{0.33\textwidth} + \textbf{When does \boldmath{$\chi^2$} have any solutions?} + + \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that can interfere with optimization?} + \end{column} + \begin{column}{0.33\textwidth} + \textbf{How does gradient descent behave?} + + \textbf{How does stochastic gradient descent behave?} + \end{column} + \begin{column}{0.33\textwidth} + \textbf{What does the manifold of perfect solutions look like?} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{A simple model of nonlinear least squares} + \begin{columns} + \begin{column}{0.65\textwidth} + Solutions form $D=N-M-1$ dimensional manifold: + \[ + \Omega=\left\{ + \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M + \right\} + \] + How to count number of components? We don't know + + \medskip + + Very good at counting isolated minima: + \[ + \#_{\substack{\text{critical}\\\text{points}}}=\int d\pmb x\, + \delta\big(\nabla h(\pmb x)\big)\, + \big|\det\operatorname{Hess}h(\pmb x)\big| + \] + + \smallskip + + \tiny\fullcite{Kent-Dobias_2022_How} + + \normalsize + \medskip + + \textbf{Today:} how to compute the \emph{Euler characteristic} of the solution manifold + \end{column} + \begin{column}{0.35\textwidth} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} \frametitle{The Euler characteristic \boldmath{$\chi$}} \begin{columns} \begin{column}{0.5\textwidth} @@ -937,7 +995,8 @@ \end{frame} \begin{frame} - \frametitle{Characteristics of the Euler characteristic } + \frametitle{The Euler characteristic \boldmath{$\chi$}} + \framesubtitle{Characteristics of the characteristic} \begin{columns} \begin{column}{0.5\textwidth} @@ -977,49 +1036,8 @@ \end{frame} \begin{frame} - \begin{columns} - \begin{column}{0.16\textwidth} - \Large - \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets} - \vspace{11em} - \end{column} - \begin{column}{0.7\textwidth} - \begin{overprint} - \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}} - \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}} - \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}} - \end{overprint} - \end{column} - \begin{column}{0.16\textwidth} - \begin{overprint} - \onslide<2>\centering High energy - - \vspace{0.5em} - - $\chi(\Omega)\ll0$ - - \vspace{0.5em} - - hole\\ - dominated - \onslide<3>\centering Low energy - - \vspace{0.5em} - - $\chi(\Omega)\gg0$ - - \vspace{0.5em} - - component\\ - dominated - \end{overprint} - \vspace{15em} - \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{Computing the Euler characteristic} + \frametitle{The Euler characteristic \boldmath{$\chi$}} + \framesubtitle{Computing the Euler characteristic} \begin{columns} \begin{column}{0.5\textwidth} Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex @@ -1041,18 +1059,6 @@ +\cdots \\ &=\sum_{i=0}^D(-1)^i\#_\text{index i} \end{align*} - \[ - \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big) - = - \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right) - =(-1)^{\text{index}} - \] - \[ - \chi(\Omega) - =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big) - \,\det\operatorname{Hess}h(\pmb x) - \] - \emph{Kac--Rice without the absolute value!} \end{column} \begin{column}{0.5\textwidth} \begin{overprint} @@ -1072,148 +1078,163 @@ \begin{columns} \begin{column}{0.6\textwidth} - Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb - x_0\cdot\pmb x$ for arbitrary $\pmb x_0$. \[ - \chi(\Omega) - =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x) + \Omega=\left\{ + \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M + \right\} \] - Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$ - - \bigskip - - Lagrange multipliers replace differential geometry: + Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb a)=\frac1N\pmb + a_0\cdot\pmb a$ for arbitrary $\pmb a_0$ \[ - L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN) + \#_{\substack{\text{critical}\\\text{points}}} + =\int_\Omega d\pmb x\, + \delta\big(\nabla h(\pmb x)\big)\, + \big|\det\operatorname{Hess}h(\pmb x)\big| + \] + \[ + \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big) + = + \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right) + =(-1)^{\text{index}} + \] + \[ + \chi(\Omega) + =\sum_{i=0}^D(-1)^i\#_\text{index i} + =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x) \] \end{column} \begin{column}{0.4\textwidth} \begin{overprint} - \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png} - \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png} - \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png} + \onslide<1>\includegraphics[width=\textwidth]{figs/function_1.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/function_2.png} + \onslide<3>\includegraphics[width=\textwidth]{figs/function_3.png} \end{overprint} \end{column} \end{columns} - \[ - \chi(\Omega) - =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big) - \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix} - \] -\end{frame} - -\begin{frame} - \begin{columns} - \begin{column}{\textwidth} - \includegraphics[width=\textwidth]{figs/slice.png} - \end{column} - \end{columns} \end{frame} \begin{frame} - \frametitle{Results: \boldmath{$3+s$} mixed spherical models} + \frametitle{A simple model of nonlinear least squares} + \framesubtitle{Results} \begin{columns} \begin{column}{0.5\textwidth} - \begin{align*} - H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\ - +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s} - \end{align*} - - \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} - - \smallskip - - \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} - - \smallskip - - \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} + $\alpha=M/N$, $\alpha<1$ is overparameterized + \[ + V_0=\hat f(J\mid \pmb a) + =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p} + \] + Simplest case: $p=1$, $\Omega$ is intersection of random hyperplanes with the sphere - \smallskip + \medskip - \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign} + Results in $\chi(\Omega)=2$ or $\chi(\Omega)=0$ depending on whether solutions exist - \bigskip + \medskip - \tiny - \fullcite{Kent-Dobias_2024_On} + \hspace{2em}\includegraphics[width=0.33\textwidth]{figs/connected.pdf} + \hfill + \includegraphics[width=0.33\textwidth]{figs/gone.pdf}\hspace{2em} \end{column} \begin{column}{0.5\textwidth} - \includegraphics[width=\textwidth]{figs/folena_new.pdf} + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/intersections_1.pdf} + \onslide<2>\includegraphics[width=\textwidth]{figs/intersections_2.pdf} + \onslide<3>\includegraphics[width=\textwidth]{figs/intersections_3.pdf} + \onslide<4>\includegraphics[width=\textwidth]{figs/intersections_4.pdf} + \onslide<5>\includegraphics[width=\textwidth]{figs/phases_1.pdf} + \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} - \frametitle{Preliminary results: other models?} + \frametitle{A simple model of nonlinear least squares} + \framesubtitle{Results} \begin{columns} \begin{column}{0.5\textwidth} - Example: non-Gaussian landscapes + $\alpha=M/N$, $\alpha<1$ is overparameterized \[ - H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 + V_0=\hat f(J\mid \pmb a) + =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p} \] - for spherical $\pmb x$ and Gaussian functions $V_i$ - \medskip + For $p\geq2$, different phases with $|\chi(\Omega)|\gg1$ with varying sign - $E_\text{sh}$ consistent with gradient descent? More work needed... - - \bigskip\tiny - - \fullcite{Kent-Dobias_2024_Algorithm-independent} - - \smallskip + \medskip - \fullcite{Kent-Dobias_2024_On} + \includegraphics[width=0.23\textwidth]{figs/middle.pdf} + \includegraphics[width=0.23\textwidth]{figs/complex.pdf} + \includegraphics[width=0.23\textwidth]{figs/shattered.pdf} + \includegraphics[width=0.23\textwidth]{figs/gone.pdf} \end{column} \begin{column}{0.5\textwidth} - \vspace{-1em} + \begin{overprint} + \onslide<1>\centering\includegraphics[width=0.8\textwidth]{figs/middle.pdf}\\$\chi(\Omega)\ll0$ + \onslide<2>\centering\includegraphics[width=0.8\textwidth]{figs/complex.pdf}\\$\chi(\Omega)\ll0$ + \onslide<3>\centering\includegraphics[width=0.8\textwidth]{figs/shattered.pdf}\\$\chi(\Omega)\gg0$ + \onslide<4>\includegraphics[width=\textwidth]{figs/phases_2.pdf} + \onslide<5>\includegraphics[width=\textwidth]{figs/phases_3.pdf} + \onslide<6>\includegraphics[width=\textwidth]{figs/phases_4.pdf} + \end{overprint} + \end{column} + \end{columns} +\end{frame} - \begin{overprint} - \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} - \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf} - \end{overprint} +\begin{frame} + \frametitle{A simple model of nonlinear least squares} + \framesubtitle{Results} - \vspace{-0.4em} + \begin{columns} + \begin{column}{1.1\textwidth} + \hspace{1em}Phases for inhomogeneous models: $1-\lambda$ parts linear ($p=1$) plus $\lambda$ quadratic ($p=2$) - \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf} + \medskip + + \includegraphics[width=0.21\columnwidth]{figs/phases_12_0.pdf} + \nolinebreak\hspace{-2.5em} + \includegraphics[width=0.21\columnwidth]{figs/phases_12_1.pdf} + \nolinebreak\hspace{-2.5em} + \includegraphics[width=0.21\columnwidth]{figs/phases_12_2.pdf} + \nolinebreak\hspace{-2.5em} + \includegraphics[width=0.21\columnwidth]{figs/phases_12_3.pdf} + \nolinebreak\hspace{-2.5em} + \includegraphics[width=0.21\columnwidth]{figs/phases_12_4.pdf} + \nolinebreak\hspace{-2.5em} + \includegraphics[width=0.21\columnwidth]{figs/phases_12_5.pdf} - \vspace{1em} + \medskip + + \hspace{5em} + \includegraphics[width=0.13\textwidth]{figs/connected.pdf} + \hfill + \includegraphics[width=0.13\textwidth]{figs/middle.pdf} + \hfill + \includegraphics[width=0.13\textwidth]{figs/complex.pdf} + \hfill + \includegraphics[width=0.13\textwidth]{figs/shattered.pdf} + \hfill + \includegraphics[width=0.13\textwidth]{figs/gone.pdf} + \hspace{5em} \end{column} \end{columns} \end{frame} \begin{frame} - \frametitle{Outlook, other applications, future directions} + \frametitle{A simple model of nonlinear least squares} + \framesubtitle{Conclusions and future directions} \begin{columns} \begin{column}{0.5\textwidth} - Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that - \[ - V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N - \] - for independent Gaussian $V_i$ + \textbf{How do these structures interact with dynamics?} \medskip - \tiny - \fullcite{Kent-Dobias_2024_On} - - \bigskip\normalsize - - \textcolor{ictpgreen}{\textbf{To Do:}} - - Resolve GD question: better DMFT, direct reasoning for relationship to topology + \textbf{What structures exist in a problem with a non-random ground truth?} \medskip - Extend topological arguments beyond GD + \textbf{How do topological properties of solutions correlate with their quality?} \end{column} \begin{column}{0.5\textwidth} - \includegraphics[width=\textwidth]{figs/spheres.png} - - \medskip - - \includegraphics[width=\textwidth]{figs/phases.png} \end{column} \end{columns} \end{frame} |