summaryrefslogtreecommitdiff
path: root/ictp-saifr_colloquium.tex
diff options
context:
space:
mode:
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r--ictp-saifr_colloquium.tex273
1 files changed, 3 insertions, 270 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex
index d07ecd3..5df9672 100644
--- a/ictp-saifr_colloquium.tex
+++ b/ictp-saifr_colloquium.tex
@@ -35,7 +35,8 @@
\addbibresource{ictp-saifr_colloquium.bib}
\title{
- Structural barriers to random optimization
+ Fitting with more parameters than data points\\\normalsize
+ Topology of the solutions to overparameterized problems
}
\author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator}
\date{19 February 2025}
@@ -58,280 +59,12 @@
\end{frame}
\begin{frame}
- \frametitle{Overview}
+ \frametitle{Curve fitting: the good, the bad, and the weird}
\begin{columns}
- \begin{column}{\textwidth}
-
- \Large\color{ictpblue}{\textbf{Introduction and background of existing work}}
- \large
-
- \medskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Introduction to random optimization problems}}
-
- \bigskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Worst-case versus typical-case performance}}
-
- \bigskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to typical-case performance of the best algorithms}}
-
- \bigskip
-
- \Large\color{ictpblue}{\textbf{Original contributions}}
- \large
-
- \medskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to performance of common algorithms}}
-
- \bigskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to performance of the most common algorithm}}
-
- \bigskip
-
- \hspace{1em}\color{ictpgreen}{\textbf{Connections to other problems}}
-
- \end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{Complexity of random landscapes}
- \begin{columns}
- \begin{column}{0.4\textwidth}
- Complexity $\Sigma=\frac1N\overline{\log\#_\text{points}}$ describes typical number of stationary points
-
- \bigskip
-
- Complexity of marginal minima crucial for understanding dynamics in mixed $p$-spin models
-
- \bigskip
-
- Lucky accident: natural parameter
- sets type of stationary point
- \end{column}
- \begin{column}{0.6\textwidth}
- \begin{overprint}
- \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2020.png}
- \onslide<2>\includegraphics[width=\textwidth]{figs/folena_2020_2.png}
- \end{overprint}
-
- \smallskip
-
- \tiny\fullcite{Folena_2020_Rethinking}
- \end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{How to count: Kac--Rice}
-
- Number of stationary points with $\nabla H(\pmb x)=0$ given by integral
- over Kac--Rice measure
- \begin{align*}
- \#_\text{points}
- &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big|
- \end{align*}
- Note absolute value of the determinant: want to account for curvature but not add $-1$
-
- \bigskip
-
- Can specify properties of points by inserting $\delta$-functions:
- \begin{align*}
- \#_\text{points}\alert<2>{(E)}
- &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big|
- \alert<2>{\,\delta\big(H(\pmb x)-NE\big)}
- \end{align*}
- How can \emph{marginality} be specified?
-\end{frame}
-
-\begin{frame}
- \frametitle{Hessian shifts and stationary point stability}
- \begin{columns}
- \begin{column}{0.5\textwidth}
- In spherical spin glasses, all points have the same Hessian spectral density: semicircle with radius $\mu_\text m$, but different shifts
- \[
- \mu=\frac1N\operatorname{Tr}\operatorname{Hess} H(\pmb x)
- \]
-
- \bigskip
-
- Condition on marginal minima by inserting
- \[
- \delta\big(\operatorname{Tr}\operatorname{Hess}H(\pmb x)-N\mu_\text{m}\big)
- \]
-
- \medskip
-
- \alert<7>{In generic models, spectral density depends on stationarity, energy, etc!}
- \end{column}
\begin{column}{0.5\textwidth}
- \begin{overlayarea}{\textwidth}{14.5em}
- \only<1-2>{\includegraphics[width=\columnwidth]{figs/mu_0.75.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_0.75.pdf}}
- \only<3>{\includegraphics[width=\columnwidth]{figs/mu_1.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_1.5.pdf}}
- \only<4>{\includegraphics[width=\columnwidth]{figs/mu_2.25.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.25.pdf}}
- \only<5>{\includegraphics[width=\columnwidth]{figs/mu_3.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_3.5.pdf}}
- \only<6>{\includegraphics[width=\columnwidth]{figs/mu_2.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.pdf}}
- \only<7>{\includegraphics[width=0.9\columnwidth]{figs/msg_marg_spectra.pdf}}
- \end{overlayarea}
\end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{Towards generic marginal complexity}
- \begin{columns}
\begin{column}{0.5\textwidth}
- \begin{itemize}[leftmargin=4em]
- \item[\color{ictpgreen}\bf Trick \#1:] condition stationary points on \emph{value of smallest eigenvalue}
- \end{itemize}
- {
- \small
- \begin{align*}
- \hspace{-3em}&\delta(\lambda_\text{min}(A)) \\
- \hspace{-3em}&=\lim_{\beta\to\infty}\int
- \frac{d\pmb s\,\delta(N-\|\pmb s\|^2)e^{-\beta\pmb s^TA\pmb s}}
- {\int d\pmb s'\,\delta(N-\|\pmb s'\|^2)e^{-\beta\pmb s'^TA\pmb s'}}
- \delta\left(\frac{\pmb s^TA\pmb s}N\right)
- \end{align*}
- }
-
- \medskip
-
- \begin{itemize}[leftmargin=4em]
- \item[\color{ictpgreen}\bf Trick \#2:] adjust $\mu\propto\operatorname{Tr}\operatorname{Hess}H$ until order-$N$ large deviation breaks
- \end{itemize}
-
- \bigskip
-
- \tiny
- \fullcite{Kent-Dobias_2024_Conditioning}
-
- \end{column}
- \begin{column}{0.5\textwidth}
- \hspace{0.9em}
- \includegraphics[scale=0.8]{figs/spectrum_less.pdf}
- \hspace{-1.6em}
- \includegraphics[scale=0.8]{figs/spectrum_eq.pdf}
- \hspace{-1.6em}
- \includegraphics[scale=0.8]{figs/spectrum_more.pdf}
- \\
- \includegraphics[scale=0.8]{figs/large_deviation.pdf}
-
- \vspace{-1em}
-
- \small
- \begin{align*}
- \hspace{-0.5em}G_0(\mu)=\frac 1N\log\Big\langle\delta\big(\lambda_\text{min}(A-\lambda\mu)\big)\Big\rangle_{A\in\text{GOE}(N)}
- \end{align*}
- \end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{Marginal complexity: example}
- \begin{columns}
- \begin{column}{0.5\textwidth}
- Example: non-Gaussian landscapes
- \[
- H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2
- \]
- for spherical $\pmb x$ and Gaussian functions $V_i$
- \[
- \overline{V_i(\pmb x)V_j(\pmb x')}=\delta_{ij}f\bigg(\frac{\pmb x\cdot\pmb x'}N\bigg)
- \]
-
- \vspace{-2em}
-
- \begin{overprint}
- \onslide<1-2>\[
- f(q)=\tfrac12q^2+\tfrac12q^3
- \]
- \onslide<3>\[
- f(q)=\kappa q+(1-\kappa)q^2
- \]
- \end{overprint}
-
- \bigskip
-
- \tiny
- \fullcite{Kent-Dobias_2024_Conditioning}
-
- \smallskip
-
- \fullcite{Kent-Dobias_2024_Algorithm-independent}
- \end{column}
- \begin{column}{0.5\textwidth}
- \begin{overprint}
- \onslide<1>\vspace{4em}\includegraphics[width=\textwidth]{figs/most_squares_complex.pdf}
- \onslide<2>\vspace{-1.75em}\includegraphics[width=\textwidth]{figs/most_squares_complexity.pdf}
-
- \vspace{-1.95em}
-
- \hspace{-0.25em}\colorbox{white}{\includegraphics[width=\textwidth]{figs/most_squares_stability.pdf}}
- \onslide<3>\vspace{-1em}
-
- \includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf}
-
- \vspace{-0.4em}
-
- \includegraphics[width=\textwidth]{figs/most_squares_zoom.pdf}
-
- \vspace{1em}
- \end{overprint}
- \end{column}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{Which marginal minima attract the dynamics?}
- \begin{columns}
- \begin{column}{0.5\textwidth}
- `Best case' performance: lowest marginal minima without the \emph{Overlap Gap Property}
-
- \smallskip
-
- \tiny
- \fullcite{Gamarnik_2021-10_The}
-
- \normalsize
- \medskip
-
- `Worst case' performance: ???
-
- \smallskip
- \tiny\fullcite{Folena_2023_On}
-
- \normalsize
- \medskip
-
- \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima}
-
- \smallskip
-
- \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP}
-
- \smallskip
-
- \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate}
-
- \medskip
-
- Gradient descent destination depends on \emph{global} property: basin of attraction size;
- stationary point analysis is only \emph{local}
-
- \medskip
-
- \end{column}
- \begin{column}{0.5\textwidth}
- \begin{overprint}
- \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2023.png}
- \onslide<2>\includegraphics[width=\textwidth]{figs/folena_new_2.pdf}
- \end{overprint}
\end{column}
\end{columns}
\end{frame}