summaryrefslogtreecommitdiff
path: root/ictp-saifr_colloquium.tex
diff options
context:
space:
mode:
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r--ictp-saifr_colloquium.tex309
1 files changed, 165 insertions, 144 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex
index e1c78bb..bedb05a 100644
--- a/ictp-saifr_colloquium.tex
+++ b/ictp-saifr_colloquium.tex
@@ -856,6 +856,10 @@
\medskip
\textbf{BUT:} extremely little is known outside of the linear case
+
+ \medskip
+
+ State of the art: sample two points, see if the line between them are also solutions
\end{column}
\begin{column}{0.5\textwidth}
\end{column}
@@ -892,6 +896,60 @@
\end{frame}
\begin{frame}
+ \frametitle{A simple model of nonlinear least squares}
+ \begin{columns}
+ \begin{column}{0.33\textwidth}
+ \textbf{When does \boldmath{$\chi^2$} have any solutions?}
+
+ \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that can interfere with optimization?}
+ \end{column}
+ \begin{column}{0.33\textwidth}
+ \textbf{How does gradient descent behave?}
+
+ \textbf{How does stochastic gradient descent behave?}
+ \end{column}
+ \begin{column}{0.33\textwidth}
+ \textbf{What does the manifold of perfect solutions look like?}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}
+ \frametitle{A simple model of nonlinear least squares}
+ \begin{columns}
+ \begin{column}{0.65\textwidth}
+ Solutions form $D=N-M-1$ dimensional manifold:
+ \[
+ \Omega=\left\{
+ \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M
+ \right\}
+ \]
+ How to count number of components? We don't know
+
+ \medskip
+
+ Very good at counting isolated minima:
+ \[
+ \#_{\substack{\text{critical}\\\text{points}}}=\int d\pmb x\,
+ \delta\big(\nabla h(\pmb x)\big)\,
+ \big|\det\operatorname{Hess}h(\pmb x)\big|
+ \]
+
+ \smallskip
+
+ \tiny\fullcite{Kent-Dobias_2022_How}
+
+ \normalsize
+ \medskip
+
+ \textbf{Today:} how to compute the \emph{Euler characteristic} of the solution manifold
+ \end{column}
+ \begin{column}{0.35\textwidth}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}
\frametitle{The Euler characteristic \boldmath{$\chi$}}
\begin{columns}
\begin{column}{0.5\textwidth}
@@ -937,7 +995,8 @@
\end{frame}
\begin{frame}
- \frametitle{Characteristics of the Euler characteristic }
+ \frametitle{The Euler characteristic \boldmath{$\chi$}}
+ \framesubtitle{Characteristics of the characteristic}
\begin{columns}
\begin{column}{0.5\textwidth}
@@ -977,49 +1036,8 @@
\end{frame}
\begin{frame}
- \begin{columns}
- \begin{column}{0.16\textwidth}
- \Large
- \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets}
- \vspace{11em}
- \end{column}
- \begin{column}{0.7\textwidth}
- \begin{overprint}
- \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}}
- \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}}
- \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}}
- \end{overprint}
- \end{column}
- \begin{column}{0.16\textwidth}
- \begin{overprint}
- \onslide<2>\centering High energy
-
- \vspace{0.5em}
-
- $\chi(\Omega)\ll0$
-
- \vspace{0.5em}
-
- hole\\
- dominated
- \onslide<3>\centering Low energy
-
- \vspace{0.5em}
-
- $\chi(\Omega)\gg0$
-
- \vspace{0.5em}
-
- component\\
- dominated
- \end{overprint}
- \vspace{15em}
- \end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{Computing the Euler characteristic}
+ \frametitle{The Euler characteristic \boldmath{$\chi$}}
+ \framesubtitle{Computing the Euler characteristic}
\begin{columns}
\begin{column}{0.5\textwidth}
Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex
@@ -1041,18 +1059,6 @@
+\cdots \\
&=\sum_{i=0}^D(-1)^i\#_\text{index i}
\end{align*}
- \[
- \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big)
- =
- \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right)
- =(-1)^{\text{index}}
- \]
- \[
- \chi(\Omega)
- =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)
- \,\det\operatorname{Hess}h(\pmb x)
- \]
- \emph{Kac--Rice without the absolute value!}
\end{column}
\begin{column}{0.5\textwidth}
\begin{overprint}
@@ -1072,148 +1078,163 @@
\begin{columns}
\begin{column}{0.6\textwidth}
- Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb
- x_0\cdot\pmb x$ for arbitrary $\pmb x_0$.
\[
- \chi(\Omega)
- =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x)
+ \Omega=\left\{
+ \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M
+ \right\}
\]
- Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$
-
- \bigskip
-
- Lagrange multipliers replace differential geometry:
+ Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb a)=\frac1N\pmb
+ a_0\cdot\pmb a$ for arbitrary $\pmb a_0$
\[
- L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN)
+ \#_{\substack{\text{critical}\\\text{points}}}
+ =\int_\Omega d\pmb x\,
+ \delta\big(\nabla h(\pmb x)\big)\,
+ \big|\det\operatorname{Hess}h(\pmb x)\big|
+ \]
+ \[
+ \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big)
+ =
+ \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right)
+ =(-1)^{\text{index}}
+ \]
+ \[
+ \chi(\Omega)
+ =\sum_{i=0}^D(-1)^i\#_\text{index i}
+ =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x)
\]
\end{column}
\begin{column}{0.4\textwidth}
\begin{overprint}
- \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png}
- \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png}
- \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png}
+ \onslide<1>\includegraphics[width=\textwidth]{figs/function_1.png}
+ \onslide<2>\includegraphics[width=\textwidth]{figs/function_2.png}
+ \onslide<3>\includegraphics[width=\textwidth]{figs/function_3.png}
\end{overprint}
\end{column}
\end{columns}
- \[
- \chi(\Omega)
- =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big)
- \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix}
- \]
-\end{frame}
-
-\begin{frame}
- \begin{columns}
- \begin{column}{\textwidth}
- \includegraphics[width=\textwidth]{figs/slice.png}
- \end{column}
- \end{columns}
\end{frame}
\begin{frame}
- \frametitle{Results: \boldmath{$3+s$} mixed spherical models}
+ \frametitle{A simple model of nonlinear least squares}
+ \framesubtitle{Results}
\begin{columns}
\begin{column}{0.5\textwidth}
- \begin{align*}
- H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\
- +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s}
- \end{align*}
-
- \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima}
-
- \smallskip
-
- \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP}
-
- \smallskip
-
- \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate}
+ $\alpha=M/N$, $\alpha<1$ is overparameterized
+ \[
+ V_0=\hat f(J\mid \pmb a)
+ =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
+ \]
+ Simplest case: $p=1$, $\Omega$ is intersection of random hyperplanes with the sphere
- \smallskip
+ \medskip
- \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign}
+ Results in $\chi(\Omega)=2$ or $\chi(\Omega)=0$ depending on whether solutions exist
- \bigskip
+ \medskip
- \tiny
- \fullcite{Kent-Dobias_2024_On}
+ \hspace{2em}\includegraphics[width=0.33\textwidth]{figs/connected.pdf}
+ \hfill
+ \includegraphics[width=0.33\textwidth]{figs/gone.pdf}\hspace{2em}
\end{column}
\begin{column}{0.5\textwidth}
- \includegraphics[width=\textwidth]{figs/folena_new.pdf}
+ \begin{overprint}
+ \onslide<1>\includegraphics[width=\textwidth]{figs/intersections_1.pdf}
+ \onslide<2>\includegraphics[width=\textwidth]{figs/intersections_2.pdf}
+ \onslide<3>\includegraphics[width=\textwidth]{figs/intersections_3.pdf}
+ \onslide<4>\includegraphics[width=\textwidth]{figs/intersections_4.pdf}
+ \onslide<5>\includegraphics[width=\textwidth]{figs/phases_1.pdf}
+ \end{overprint}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
- \frametitle{Preliminary results: other models?}
+ \frametitle{A simple model of nonlinear least squares}
+ \framesubtitle{Results}
\begin{columns}
\begin{column}{0.5\textwidth}
- Example: non-Gaussian landscapes
+ $\alpha=M/N$, $\alpha<1$ is overparameterized
\[
- H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2
+ V_0=\hat f(J\mid \pmb a)
+ =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
\]
- for spherical $\pmb x$ and Gaussian functions $V_i$
- \medskip
+ For $p\geq2$, different phases with $|\chi(\Omega)|\gg1$ with varying sign
- $E_\text{sh}$ consistent with gradient descent? More work needed...
-
- \bigskip\tiny
-
- \fullcite{Kent-Dobias_2024_Algorithm-independent}
-
- \smallskip
+ \medskip
- \fullcite{Kent-Dobias_2024_On}
+ \includegraphics[width=0.23\textwidth]{figs/middle.pdf}
+ \includegraphics[width=0.23\textwidth]{figs/complex.pdf}
+ \includegraphics[width=0.23\textwidth]{figs/shattered.pdf}
+ \includegraphics[width=0.23\textwidth]{figs/gone.pdf}
\end{column}
\begin{column}{0.5\textwidth}
- \vspace{-1em}
+ \begin{overprint}
+ \onslide<1>\centering\includegraphics[width=0.8\textwidth]{figs/middle.pdf}\\$\chi(\Omega)\ll0$
+ \onslide<2>\centering\includegraphics[width=0.8\textwidth]{figs/complex.pdf}\\$\chi(\Omega)\ll0$
+ \onslide<3>\centering\includegraphics[width=0.8\textwidth]{figs/shattered.pdf}\\$\chi(\Omega)\gg0$
+ \onslide<4>\includegraphics[width=\textwidth]{figs/phases_2.pdf}
+ \onslide<5>\includegraphics[width=\textwidth]{figs/phases_3.pdf}
+ \onslide<6>\includegraphics[width=\textwidth]{figs/phases_4.pdf}
+ \end{overprint}
+ \end{column}
+ \end{columns}
+\end{frame}
- \begin{overprint}
- \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf}
- \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf}
- \end{overprint}
+\begin{frame}
+ \frametitle{A simple model of nonlinear least squares}
+ \framesubtitle{Results}
- \vspace{-0.4em}
+ \begin{columns}
+ \begin{column}{1.1\textwidth}
+ \hspace{1em}Phases for inhomogeneous models: $1-\lambda$ parts linear ($p=1$) plus $\lambda$ quadratic ($p=2$)
- \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf}
+ \medskip
+
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_0.pdf}
+ \nolinebreak\hspace{-2.5em}
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_1.pdf}
+ \nolinebreak\hspace{-2.5em}
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_2.pdf}
+ \nolinebreak\hspace{-2.5em}
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_3.pdf}
+ \nolinebreak\hspace{-2.5em}
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_4.pdf}
+ \nolinebreak\hspace{-2.5em}
+ \includegraphics[width=0.21\columnwidth]{figs/phases_12_5.pdf}
- \vspace{1em}
+ \medskip
+
+ \hspace{5em}
+ \includegraphics[width=0.13\textwidth]{figs/connected.pdf}
+ \hfill
+ \includegraphics[width=0.13\textwidth]{figs/middle.pdf}
+ \hfill
+ \includegraphics[width=0.13\textwidth]{figs/complex.pdf}
+ \hfill
+ \includegraphics[width=0.13\textwidth]{figs/shattered.pdf}
+ \hfill
+ \includegraphics[width=0.13\textwidth]{figs/gone.pdf}
+ \hspace{5em}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
- \frametitle{Outlook, other applications, future directions}
+ \frametitle{A simple model of nonlinear least squares}
+ \framesubtitle{Conclusions and future directions}
\begin{columns}
\begin{column}{0.5\textwidth}
- Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that
- \[
- V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N
- \]
- for independent Gaussian $V_i$
+ \textbf{How do these structures interact with dynamics?}
\medskip
- \tiny
- \fullcite{Kent-Dobias_2024_On}
-
- \bigskip\normalsize
-
- \textcolor{ictpgreen}{\textbf{To Do:}}
-
- Resolve GD question: better DMFT, direct reasoning for relationship to topology
+ \textbf{What structures exist in a problem with a non-random ground truth?}
\medskip
- Extend topological arguments beyond GD
+ \textbf{How do topological properties of solutions correlate with their quality?}
\end{column}
\begin{column}{0.5\textwidth}
- \includegraphics[width=\textwidth]{figs/spheres.png}
-
- \medskip
-
- \includegraphics[width=\textwidth]{figs/phases.png}
\end{column}
\end{columns}
\end{frame}