1 files changed, 165 insertions, 144 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex
index e1c78bb..bedb05a 100644
--- a/ictp-saifr_colloquium.tex
+++ b/ictp-saifr_colloquium.tex
@@ -856,6 +856,10 @@
       \medskip
 
       \textbf{BUT:} extremely little is known outside of the linear case
+
+      \medskip
+
+      State of the art: sample two points, see if the line between them are also solutions
     \end{column}
     \begin{column}{0.5\textwidth}
     \end{column}
@@ -892,6 +896,60 @@
 \end{frame}
 
 \begin{frame}
+  \frametitle{A simple model of nonlinear least squares}
+  \begin{columns}
+    \begin{column}{0.33\textwidth}
+      \textbf{When does \boldmath{$\chi^2$} have any solutions?}
+
+      \textbf{When does \boldmath{$\chi^2$} have suboptimal minima that can interfere with optimization?}
+    \end{column}
+    \begin{column}{0.33\textwidth}
+      \textbf{How does gradient descent behave?}
+
+      \textbf{How does stochastic gradient descent behave?}
+    \end{column}
+    \begin{column}{0.33\textwidth}
+      \textbf{What does the manifold of perfect solutions look like?}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}
+  \frametitle{A simple model of nonlinear least squares}
+  \begin{columns}
+    \begin{column}{0.65\textwidth}
+      Solutions form $D=N-M-1$ dimensional manifold:
+      \[
+        \Omega=\left\{
+          \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M
+        \right\}
+      \]
+      How to count number of components? We don't know
+
+      \medskip
+
+      Very good at counting isolated minima:
+      \[
+        \#_{\substack{\text{critical}\\\text{points}}}=\int d\pmb x\,
+        \delta\big(\nabla h(\pmb x)\big)\,
+        \big|\det\operatorname{Hess}h(\pmb x)\big|
+      \]
+
+      \smallskip
+
+      \tiny\fullcite{Kent-Dobias_2022_How}
+
+      \normalsize
+      \medskip
+
+      \textbf{Today:} how to compute the \emph{Euler characteristic} of the solution manifold
+    \end{column}
+    \begin{column}{0.35\textwidth}
+    \end{column}
+  \end{columns}
+\end{frame}
+
+\begin{frame}
   \frametitle{The Euler characteristic \boldmath{$\chi$}}
   \begin{columns}
     \begin{column}{0.5\textwidth}
@@ -937,7 +995,8 @@
 \end{frame}
 
 \begin{frame}
-  \frametitle{Characteristics of the Euler characteristic }
+  \frametitle{The Euler characteristic \boldmath{$\chi$}}
+  \framesubtitle{Characteristics of the characteristic}
 
   \begin{columns}
     \begin{column}{0.5\textwidth}
@@ -977,49 +1036,8 @@
 \end{frame}
 
 \begin{frame}
-  \begin{columns}
-    \begin{column}{0.16\textwidth}
-      \Large
-      \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets}
-      \vspace{11em}
-    \end{column}
-    \begin{column}{0.7\textwidth}
-      \begin{overprint}
-        \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}}
-        \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}}
-        \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}}
-      \end{overprint}
-    \end{column}
-    \begin{column}{0.16\textwidth}
-      \begin{overprint}
-        \onslide<2>\centering High energy
-
-        \vspace{0.5em}
-
-        $\chi(\Omega)\ll0$
-
-        \vspace{0.5em}
-
-        hole\\
-        dominated
-        \onslide<3>\centering Low energy
-
-        \vspace{0.5em}
-
-        $\chi(\Omega)\gg0$
-
-        \vspace{0.5em}
-
-        component\\
-        dominated
-      \end{overprint}
-      \vspace{15em}
-    \end{column}
-  \end{columns}
-\end{frame}
-
-\begin{frame}
-  \frametitle{Computing the Euler characteristic}
+  \frametitle{The Euler characteristic \boldmath{$\chi$}}
+  \framesubtitle{Computing the Euler characteristic}
   \begin{columns}
     \begin{column}{0.5\textwidth}
       Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex
@@ -1041,18 +1059,6 @@
         +\cdots \\
         &=\sum_{i=0}^D(-1)^i\#_\text{index i}
       \end{align*}
-      \[
-        \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big)
-        =
-        \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right)
-        =(-1)^{\text{index}}
-      \]
-      \[
-        \chi(\Omega)
-        =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)
-        \,\det\operatorname{Hess}h(\pmb x)
-      \]
-      \emph{Kac--Rice without the absolute value!}
     \end{column}
     \begin{column}{0.5\textwidth}
       \begin{overprint}
@@ -1072,148 +1078,163 @@
 
   \begin{columns}
     \begin{column}{0.6\textwidth}
-      Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb
-      x_0\cdot\pmb x$ for arbitrary $\pmb x_0$.
       \[
-        \chi(\Omega)
-        =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x)
+        \Omega=\left\{
+          \pmb a\in\mathbb R^N\mid\|\pmb a\|^2=N, V_0=\hat f(J^i\mid\pmb a)\;\forall\;1\leq i\leq M
+        \right\}
       \]
-      Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$
-
-      \bigskip
-
-      Lagrange multipliers replace differential geometry:
+      Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb a)=\frac1N\pmb
+      a_0\cdot\pmb a$ for arbitrary $\pmb a_0$
       \[
-        L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN)
+        \#_{\substack{\text{critical}\\\text{points}}}
+        =\int_\Omega d\pmb x\,
+        \delta\big(\nabla h(\pmb x)\big)\,
+        \big|\det\operatorname{Hess}h(\pmb x)\big|
+      \]
+      \[
+        \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big)
+        =
+        \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right)
+        =(-1)^{\text{index}}
+      \]
+      \[
+        \chi(\Omega)
+        =\sum_{i=0}^D(-1)^i\#_\text{index i}
+        =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x)
       \]
     \end{column}
     \begin{column}{0.4\textwidth}
       \begin{overprint}
-        \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png}
-        \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png}
-        \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png}
+        \onslide<1>\includegraphics[width=\textwidth]{figs/function_1.png}
+        \onslide<2>\includegraphics[width=\textwidth]{figs/function_2.png}
+        \onslide<3>\includegraphics[width=\textwidth]{figs/function_3.png}
       \end{overprint}
     \end{column}
   \end{columns}
-      \[
-        \chi(\Omega)
-        =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big)
-        \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix}
-      \]
-\end{frame}
-
-\begin{frame}
-  \begin{columns}
-    \begin{column}{\textwidth}
-      \includegraphics[width=\textwidth]{figs/slice.png}
-    \end{column}
-  \end{columns}
 \end{frame}
 
 \begin{frame}
-  \frametitle{Results: \boldmath{$3+s$} mixed spherical models}
+  \frametitle{A simple model of nonlinear least squares}
+  \framesubtitle{Results}
   \begin{columns}
     \begin{column}{0.5\textwidth}
-      \begin{align*}
-        H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\
-        +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s}
-      \end{align*}
-
-      \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima}
-
-      \smallskip
-
-      \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP}
-
-      \smallskip
-
-      \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate}
+      $\alpha=M/N$, $\alpha<1$ is overparameterized
+      \[
+        V_0=\hat f(J\mid \pmb a)
+        =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
+      \]
+      Simplest case: $p=1$, $\Omega$ is intersection of random hyperplanes with the sphere
 
-      \smallskip
+      \medskip
 
-      \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign}
+      Results in $\chi(\Omega)=2$ or $\chi(\Omega)=0$ depending on whether solutions exist
 
-      \bigskip
+      \medskip
 
-      \tiny
-      \fullcite{Kent-Dobias_2024_On}
+      \hspace{2em}\includegraphics[width=0.33\textwidth]{figs/connected.pdf}
+      \hfill
+      \includegraphics[width=0.33\textwidth]{figs/gone.pdf}\hspace{2em}
     \end{column}
     \begin{column}{0.5\textwidth}
-      \includegraphics[width=\textwidth]{figs/folena_new.pdf}
+      \begin{overprint}
+        \onslide<1>\includegraphics[width=\textwidth]{figs/intersections_1.pdf}
+        \onslide<2>\includegraphics[width=\textwidth]{figs/intersections_2.pdf}
+        \onslide<3>\includegraphics[width=\textwidth]{figs/intersections_3.pdf}
+        \onslide<4>\includegraphics[width=\textwidth]{figs/intersections_4.pdf}
+        \onslide<5>\includegraphics[width=\textwidth]{figs/phases_1.pdf}
+      \end{overprint}
     \end{column}
   \end{columns}
 \end{frame}
 
 \begin{frame}
-  \frametitle{Preliminary results: other models?}
+  \frametitle{A simple model of nonlinear least squares}
+  \framesubtitle{Results}
   \begin{columns}
     \begin{column}{0.5\textwidth}
-      Example: non-Gaussian landscapes
+      $\alpha=M/N$, $\alpha<1$ is overparameterized
       \[
-        H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2
+        V_0=\hat f(J\mid \pmb a)
+        =\sum_{i_1=1}^N\cdots\sum_{i_p=1}^NJ_{i_1,\ldots,i_p}a_{i_1}\cdots a_{i_p}
       \]
-      for spherical $\pmb x$ and Gaussian functions $V_i$
 
-      \medskip
+      For $p\geq2$, different phases with $|\chi(\Omega)|\gg1$ with varying sign
 
-      $E_\text{sh}$ consistent with gradient descent? More work needed...
-
-      \bigskip\tiny
-
-      \fullcite{Kent-Dobias_2024_Algorithm-independent}
-
-      \smallskip
+      \medskip
 
-      \fullcite{Kent-Dobias_2024_On}
+      \includegraphics[width=0.23\textwidth]{figs/middle.pdf}
+      \includegraphics[width=0.23\textwidth]{figs/complex.pdf}
+      \includegraphics[width=0.23\textwidth]{figs/shattered.pdf}
+      \includegraphics[width=0.23\textwidth]{figs/gone.pdf}
     \end{column}
     \begin{column}{0.5\textwidth}
-        \vspace{-1em}
+      \begin{overprint}
+        \onslide<1>\centering\includegraphics[width=0.8\textwidth]{figs/middle.pdf}\\$\chi(\Omega)\ll0$
+        \onslide<2>\centering\includegraphics[width=0.8\textwidth]{figs/complex.pdf}\\$\chi(\Omega)\ll0$
+        \onslide<3>\centering\includegraphics[width=0.8\textwidth]{figs/shattered.pdf}\\$\chi(\Omega)\gg0$
+        \onslide<4>\includegraphics[width=\textwidth]{figs/phases_2.pdf}
+        \onslide<5>\includegraphics[width=\textwidth]{figs/phases_3.pdf}
+        \onslide<6>\includegraphics[width=\textwidth]{figs/phases_4.pdf}
+      \end{overprint}
+    \end{column}
+  \end{columns}
+\end{frame}
 
-        \begin{overprint}
-          \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf}
-          \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf}
-        \end{overprint}
+\begin{frame}
+  \frametitle{A simple model of nonlinear least squares}
+  \framesubtitle{Results}
 
-        \vspace{-0.4em}
+  \begin{columns}
+    \begin{column}{1.1\textwidth}
+      \hspace{1em}Phases for inhomogeneous models: $1-\lambda$ parts linear ($p=1$) plus $\lambda$ quadratic ($p=2$)
 
-        \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf}
+      \medskip
+
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_0.pdf}
+      \nolinebreak\hspace{-2.5em}
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_1.pdf}
+      \nolinebreak\hspace{-2.5em}
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_2.pdf}
+      \nolinebreak\hspace{-2.5em}
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_3.pdf}
+      \nolinebreak\hspace{-2.5em}
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_4.pdf}
+      \nolinebreak\hspace{-2.5em}
+      \includegraphics[width=0.21\columnwidth]{figs/phases_12_5.pdf}
 
-        \vspace{1em}
+      \medskip
+
+      \hspace{5em}
+      \includegraphics[width=0.13\textwidth]{figs/connected.pdf}
+      \hfill
+      \includegraphics[width=0.13\textwidth]{figs/middle.pdf}
+      \hfill
+      \includegraphics[width=0.13\textwidth]{figs/complex.pdf}
+      \hfill
+      \includegraphics[width=0.13\textwidth]{figs/shattered.pdf}
+      \hfill
+      \includegraphics[width=0.13\textwidth]{figs/gone.pdf}
+      \hspace{5em}
     \end{column}
   \end{columns}
 \end{frame}
 
 \begin{frame}
-  \frametitle{Outlook, other applications, future directions}
+  \frametitle{A simple model of nonlinear least squares}
+  \framesubtitle{Conclusions and future directions}
   \begin{columns}
     \begin{column}{0.5\textwidth}
-      Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that
-      \[
-        V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N
-      \]
-      for independent Gaussian $V_i$
+      \textbf{How do these structures interact with dynamics?}
 
       \medskip
 
-      \tiny
-      \fullcite{Kent-Dobias_2024_On}
-
-      \bigskip\normalsize
-
-      \textcolor{ictpgreen}{\textbf{To Do:}}
-
-      Resolve GD question: better DMFT, direct reasoning for relationship to topology
+      \textbf{What structures exist in a problem with a non-random ground truth?}
 
       \medskip
 
-      Extend topological arguments beyond GD
+      \textbf{How do topological properties of solutions correlate with their quality?}
     \end{column}
     \begin{column}{0.5\textwidth}
-      \includegraphics[width=\textwidth]{figs/spheres.png}
-
-      \medskip
-
-      \includegraphics[width=\textwidth]{figs/phases.png}
     \end{column}
   \end{columns}
 \end{frame}